def upload_media(self, media): """Uploads one or more images or videos from web URLs. https://docs.joinmastodon.org/api/rest/media/ Args: media: sequence of AS image or stream objects, eg: [{'url': 'http://picture', 'displayName': 'a thing'}, ...] Returns: list of string media ids for uploaded files """ uploaded = set() # URLs uploaded so far; for de-duping ids = [] for obj in media: url = util.get_url(obj, key='stream') or util.get_url(obj) if not url or url in uploaded: continue data = {} alt = obj.get('displayName') if alt: data['description'] = util.ellipsize(alt, chars=MAX_ALT_LENGTH) # TODO: mime type check? with util.requests_get(url, stream=True) as fetch: fetch.raise_for_status() upload = self._post(API_MEDIA, files={'file': fetch.raw}) logging.info('Got: %s', upload) media_id = upload['id'] ids.append(media_id) uploaded.add(url) return ids
def postprocess_object(self, obj): """Does source-independent post-processing of an object, in place. Right now just populates the displayName field. Args: object: object dict """ verb = obj.get('verb') content = obj.get('content') rsvp_content = RSVP_CONTENTS.get(verb) if rsvp_content and not content: if verb.startswith('rsvp-'): content = obj['content'] = '<data class="p-rsvp" value="%s">%s</data>' % ( verb.split('-')[1], rsvp_content) else: content = obj['content'] = rsvp_content if content and not obj.get('displayName'): actor_name = self.actor_name(obj.get('author') or obj.get('actor')) if verb in ('like', 'share'): obj['displayName'] = '%s %s' % (actor_name, content) elif rsvp_content: if verb == 'invite': actor_name = self.actor_name(obj.get('object')) obj['displayName'] = '%s %s' % (actor_name, rsvp_content) else: obj['displayName'] = util.ellipsize(content) return util.trim_nulls(obj)
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/activitystreams-unofficial') request_url = _remove_query_params( request_url) if request_url else host_url for a in activities: obj = a.get('object', {}) # Render content as HTML content = obj.get('content') obj['rendered_content'] = microformats2.render_content(obj) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or content or 'Untitled') # Normalize attachments.image to always be a list. for att in obj.get('attachments', []): image = att.get('image') if image and not isinstance(image, list): att['image'] = [image] return template.render( ATOM_TEMPLATE_FILE, { 'items': activities, 'host_url': host_url, 'request_url': request_url, 'title': title or 'User feed for ' + source.Source.actor_name(actor), 'updated': activities[0]['object'].get('published') if activities else '', 'actor': actor, })
def get_title(mf2): """Returns an mf2 object's title, ie its name. Args: mf2: dict, parsed mf2 object (ie return value from mf2py.parse()) Returns: string title, possibly ellipsized """ lines = mf2util.interpret_feed(mf2, '').get('name', '').splitlines() if lines: return util.ellipsize(lines[0]) return ''
def get_title(mf2): """Returns an mf2 object's title, ie its name. Args: mf2: dict, parsed mf2 object (ie return value from mf2py.parse()) Returns: string title, possibly ellipsized """ lines = mf2util.interpret_feed(mf2, '').get('name', '').splitlines() if lines: return util.ellipsize(lines[0]) return ''
def get_title(mf2): """Returns the author of a page as a ActivityStreams actor dict. Args: mf2: dict, parsed mf2 object (ie return value from mf2py.parse()) Returns: string title, possibly ellipsized """ lines = mf2util.interpret_feed(mf2, '').get('name', '').splitlines() if lines: return util.ellipsize(lines[0]) return ''
def _prepare_activity(a, reader=True): """Preprocesses an activity to prepare it to be rendered as Atom. Modifies a in place. Args: a: ActivityStreams 1 activity dict reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. """ act_type = source.object_type(a) if not act_type or act_type == 'post': primary = a.get('object', {}) else: primary = a obj = a.setdefault('object', {}) # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands( microformats2.render_content(primary, include_location=reader, render_attachments=True)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize( _encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape( BeautifulSoup(a['title']).get_text('')) children = [] image_urls_seen = set() image_atts = [] # normalize attachments, render attached notes/articles attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['stream'] = util.get_first(att, 'stream') type = att.get('objectType') if type == 'image': image_atts.append(util.get_first(att, 'image')) continue image_urls_seen |= set(util.get_urls(att, 'image')) if type in ('note', 'article'): html = microformats2.render_content(att, include_location=reader, render_attachments=True) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties', [])) html = '%s: %s' % (name.strip(), html) children.append(html) # render image(s) that we haven't already seen for image in image_atts + util.get_list(obj, 'image'): if not image: continue url = image.get('url') parsed = urllib.parse.urlparse(url) rest = urllib.parse.urlunparse(('', '') + parsed[2:]) img_src_re = re.compile( r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" % (re.escape(parsed.netloc), re.escape(rest))) if (url and url not in image_urls_seen and not img_src_re.search(obj['rendered_content'])): children.append(microformats2.img(url)) image_urls_seen.add(url) obj['rendered_children'] = [ _encode_ampersands(child) for child in children ] # make sure published and updated are strict RFC 3339 timestamps for prop in 'published', 'updated': val = obj.get(prop) if val: obj[prop] = util.maybe_iso8601_to_rfc3339(val) # Atom timestamps are even stricter than RFC 3339: they can't be naive ie # time zone unaware. They must have either an offset or the Z suffix. # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html if not util.TIMEZONE_OFFSET_RE.search(obj[prop]): obj[prop] += 'Z'
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None, xml_base=None, rels=None, reader=True): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. xml_base: the base URL, if any. Used in the top-level xml:base attribute. rels: rel links to include. dict mapping string rel value to string URL. reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') if request_url is None: request_url = host_url for a in activities: act_type = source.object_type(a) if not act_type or act_type == 'post': primary = a.get('object', {}) else: primary = a obj = a.setdefault('object', {}) # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands(microformats2.render_content( primary, include_location=reader)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text('')) # Normalize attachments.image to always be a list. attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['image'] = util.get_list(att, 'image') obj['rendered_children'] = [] for att in attachments: if att.get('objectType') in ('note', 'article'): html = microformats2.render_content(att, include_location=reader) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties', [])) html = '%s: %s' % (name.strip(), html) obj['rendered_children'].append(_encode_ampersands(html)) # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__(Defaulter, **{ k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()}) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u'' env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, xml_base=xml_base, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=activities[0]['object'].get('published', '') if activities else '', actor=Defaulter(**actor), rels=rels or {}, )
def _create(self, obj, preview=None, include_link=source.OMIT_LINK, ignore_formatting=False): """Creates a new issue or comment. When creating a new issue, if the authenticated user is a collaborator on the repo, tags that match existing labels are converted to those labels and included. https://developer.github.com/v4/guides/forming-calls/#about-mutations https://developer.github.com/v4/mutation/addcomment/ https://developer.github.com/v4/mutation/addreaction/ https://developer.github.com/v3/issues/#create-an-issue Args: obj: ActivityStreams object preview: boolean include_link: string ignore_formatting: boolean Returns: a CreationResult If preview is True, the contents will be a unicode string HTML snippet. If False, it will be a dict with 'id' and 'url' keys for the newly created GitHub object. """ assert preview in (False, True) type = source.object_type(obj) if type and type not in ('issue', 'comment', 'activity', 'note', 'article', 'like', 'tag'): return source.creation_result( abort=False, error_plain='Cannot publish %s to GitHub' % type) base_obj = self.base_object(obj) base_url = base_obj.get('url') if not base_url: return source.creation_result( abort=True, error_plain='You need an in-reply-to GitHub repo, issue, PR, or comment URL.') content = orig_content = html.escape( self._content_for_create(obj, ignore_formatting=ignore_formatting), quote=False) url = obj.get('url') if include_link == source.INCLUDE_LINK and url: content += '\n\n(Originally published at: %s)' % url parsed = urllib.parse.urlparse(base_url) path = parsed.path.strip('/').split('/') owner, repo = path[:2] if len(path) == 4: number = path[3] comment_id = re.match(r'^issuecomment-([0-9]+)$', parsed.fragment) if comment_id: comment_id = comment_id.group(1) elif parsed.fragment: return source.creation_result( abort=True, error_plain='Please remove the fragment #%s from your in-reply-to URL.' % parsed.fragment) if type == 'comment': # comment or reaction if not (len(path) == 4 and path[2] in ('issues', 'pull')): return source.creation_result( abort=True, error_plain='GitHub comment requires in-reply-to issue or PR URL.') is_reaction = orig_content in REACTIONS_GRAPHQL if preview: if comment_id: comment = self.rest(REST_API_COMMENT % (owner, repo, comment_id)).json() target_link = '<a href="%s">a comment on %s/%s#%s, <em>%s</em></a>' % ( base_url, owner, repo, number, util.ellipsize(comment['body'])) else: resp = self.graphql(GRAPHQL_ISSUE_OR_PR, locals()) issue = (resp.get('repository') or {}).get('issueOrPullRequest') target_link = '<a href="%s">%s/%s#%s%s</a>' % ( base_url, owner, repo, number, (', <em>%s</em>' % issue['title']) if issue else '') if is_reaction: preview_content = None desc = u'<span class="verb">react %s</span> to %s.' % ( orig_content, target_link) else: preview_content = self.render_markdown(content, owner, repo) desc = '<span class="verb">comment</span> on %s:' % target_link return source.creation_result(content=preview_content, description=desc) else: # create # we originally used the GraphQL API to create issue comments and # reactions, but it often gets rejected against org repos due to access # controls. oddly, the REST API works fine in those same cases. # https://github.com/snarfed/bridgy/issues/824 if is_reaction: if comment_id: api_url = REST_API_COMMENT_REACTIONS % (owner, repo, comment_id) reacted = self.rest(api_url, data={ 'content': REACTIONS_REST.get(orig_content), }).json() url = base_url else: api_url = REST_API_REACTIONS % (owner, repo, number) reacted = self.rest(api_url, data={ 'content': REACTIONS_REST.get(orig_content), }).json() url = '%s#%s-by-%s' % (base_url, reacted['content'].lower(), reacted['user']['login']) return source.creation_result({ 'id': reacted.get('id'), 'url': url, 'type': 'react', }) else: try: api_url = REST_API_COMMENTS % (owner, repo, number) commented = self.rest(api_url, data={'body': content}).json() return source.creation_result({ 'id': commented.get('id'), 'url': commented.get('html_url'), 'type': 'comment', }) except ValueError as e: return source.creation_result(abort=True, error_plain=str(e)) elif type == 'like': # star if not (len(path) == 2 or (len(path) == 3 and path[2] == 'issues')): return source.creation_result( abort=True, error_plain='GitHub like requires in-reply-to repo URL.') if preview: return source.creation_result( description='<span class="verb">star</span> <a href="%s">%s/%s</a>.' % (base_url, owner, repo)) else: issue = self.graphql(GRAPHQL_REPO, locals()) resp = self.graphql(GRAPHQL_ADD_STAR, { 'starrable_id': issue['repository']['id'], }) return source.creation_result({ 'url': base_url + '/stargazers', }) elif type == 'tag': # add label if not (len(path) == 4 and path[2] in ('issues', 'pull')): return source.creation_result( abort=True, error_plain='GitHub tag post requires tag-of issue or PR URL.') tags = set(util.trim_nulls(t.get('displayName', '').strip() for t in util.get_list(obj, 'object'))) if not tags: return source.creation_result( abort=True, error_plain='No tags found in tag post!') existing_labels = self.existing_labels(owner, repo) labels = sorted(tags & existing_labels) issue_link = '<a href="%s">%s/%s#%s</a>' % (base_url, owner, repo, number) if not labels: return source.creation_result( abort=True, error_html="No tags in [%s] matched %s's existing labels [%s]." % (', '.join(sorted(tags)), issue_link, ', '.join(sorted(existing_labels)))) if preview: return source.creation_result( description='add label%s <span class="verb">%s</span> to %s.' % ( ('s' if len(labels) > 1 else ''), ', '.join(labels), issue_link)) else: resp = self.rest(REST_API_ISSUE_LABELS % (owner, repo, number), labels).json() return source.creation_result({ 'url': base_url, 'type': 'tag', 'tags': labels, }) else: # new issue if not (len(path) == 2 or (len(path) == 3 and path[2] == 'issues')): return source.creation_result( abort=True, error_plain='New GitHub issue requires in-reply-to repo URL') title = util.ellipsize(obj.get('displayName') or obj.get('title') or orig_content) tags = set(util.trim_nulls(t.get('displayName', '').strip() for t in util.get_list(obj, 'tags'))) labels = sorted(tags & self.existing_labels(owner, repo)) if preview: preview_content = '<b>%s</b><hr>%s' % ( title, self.render_markdown(content, owner, repo)) preview_labels = '' if labels: preview_labels = ' and attempt to add label%s <span class="verb">%s</span>' % ( 's' if len(labels) > 1 else '', ', '.join(labels)) return source.creation_result(content=preview_content, description="""\ <span class="verb">create a new issue</span> on <a href="%s">%s/%s</a>%s:""" % (base_url, owner, repo, preview_labels)) else: resp = self.rest(REST_API_CREATE_ISSUE % (owner, repo), { 'title': title, 'body': content, 'labels': labels, }).json() resp['url'] = resp.pop('html_url') return source.creation_result(resp) return source.creation_result( abort=False, error_plain="%s doesn't look like a GitHub repo, issue, or PR URL." % base_url)
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') request_url = _remove_query_params( request_url) if request_url else host_url for a in activities: obj = a.get('object', {}) # Render content as HTML; escape &s content = obj.get('content') obj['rendered_content'] = _encode_ampersands( microformats2.render_content(obj)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize( _encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or content or 'Untitled')) # Normalize attachments.image to always be a list. for att in obj.get('attachments', []): image = att.get('image') if image and not isinstance(image, list): att['image'] = [image] # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__( Defaulter, **{ k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items() }) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u'' env = jinja2.Environment(loader=jinja2.PackageLoader( __package__, 'templates'), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=activities[0]['object'].get('published', '') if activities else '', actor=Defaulter(**actor), )
def from_activities(activities, actor=None, title=None, feed_url=None, home_page_url=None, hfeed=None): """Converts ActivityStreams activities to an RSS 2.0 feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title feed_url: string, the URL for this RSS feed home_page_url: string, the home page URL hfeed: dict, parsed mf2 h-feed, if available Returns: unicode string with RSS 2.0 XML """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, str)): raise TypeError('activities may not be a dict or string') fg = FeedGenerator() fg.id(feed_url) assert feed_url fg.link(href=feed_url, rel='self') if home_page_url: fg.link(href=home_page_url, rel='alternate') # TODO: parse language from lang attribute: # https://github.com/microformats/mf2py/issues/150 fg.language('en') fg.generator('granary', uri='https://granary.io/') hfeed = hfeed or {} actor = actor or {} image = (util.get_url(hfeed.get('properties', {}), 'photo') or util.get_url(actor, 'image')) if image: fg.image(image) props = hfeed.get('properties') or {} content = microformats2.get_text(util.get_first(props, 'content', '')) summary = util.get_first(props, 'summary', '') desc = content or summary or '-' fg.description(desc) # required fg.title(title or util.ellipsize(desc)) # required latest = None feed_has_enclosure = False for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue item = fg.add_entry() url = obj.get('url') id = obj.get('id') or url item.id(id) item.link(href=url) item.guid(url, permalink=True) # title (required) title = (obj.get('title') or obj.get('displayName') or util.ellipsize(obj.get('content', '-'))) # strip HTML tags title = util.parse_html(title).get_text('').strip() item.title(title) content = microformats2.render_content(obj, include_location=True, render_attachments=True, render_image=True) if not content: content = obj.get('summary') if content: item.content(content, type='CDATA') categories = [ { 'term': t['displayName'] } for t in obj.get('tags', []) if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share') and t.get('objectType') not in ('article', 'person', 'mention') ] item.category(categories) author = obj.get('author', {}) author = { 'name': author.get('displayName') or author.get('username'), 'uri': author.get('url'), 'email': author.get('email') or '-', } item.author(author) published = obj.get('published') or obj.get('updated') if published and isinstance(published, str): try: dt = mf2util.parse_datetime(published) if not isinstance(dt, datetime): dt = datetime.combine(dt, time.min) if not dt.tzinfo: dt = dt.replace(tzinfo=util.UTC) item.published(dt) if not latest or dt > latest: latest = dt except ValueError: # bad datetime string pass item_has_enclosure = False for att in obj.get('attachments', []): stream = util.get_first(att, 'stream') or att if not stream: continue url = stream.get('url') or '' mime = mimetypes.guess_type(url)[0] or '' if (att.get('objectType') in ENCLOSURE_TYPES or mime and mime.split('/')[0] in ENCLOSURE_TYPES): if item_has_enclosure: logging.info( 'Warning: item %s already has an RSS enclosure, skipping additional enclosure %s', id, url) continue item_has_enclosure = feed_has_enclosure = True item.enclosure(url=url, type=mime, length=str(stream.get('size', ''))) item.load_extension('podcast') duration = stream.get('duration') if duration: item.podcast.itunes_duration(duration) if feed_has_enclosure: fg.load_extension('podcast') fg.podcast.itunes_author( actor.get('displayName') or actor.get('username')) if summary: fg.podcast.itunes_summary(summary) fg.podcast.itunes_explicit('no') fg.podcast.itunes_block(False) name = author.get('name') if name: fg.podcast.itunes_author(name) if image: fg.podcast.itunes_image(image) fg.podcast.itunes_category(categories) if latest: fg.lastBuildDate(latest) return fg.rss_str(pretty=True).decode('utf-8')
def _prepare_activity(a, reader=True): """Preprocesses an activity to prepare it to be rendered as Atom. Modifies a in place. Args: a: ActivityStreams 1 activity dict reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. """ act_type = source.object_type(a) obj = util.get_first(a, 'object', default={}) primary = obj if (not act_type or act_type == 'post') else a # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands(microformats2.render_content( primary, include_location=reader, render_attachments=True)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text('')) children = [] image_urls_seen = set() image_atts = [] # normalize actor images for elem in a, obj: actor = elem.get('actor') if actor: actor['image'] = util.get_first(actor, 'image') # normalize attachments, render attached notes/articles attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['stream'] = util.get_first(att, 'stream') type = att.get('objectType') if type == 'image': att['image'] = util.get_first(att, 'image') image_atts.append(att['image']) continue image_urls_seen |= set(util.get_urls(att, 'image')) if type in ('note', 'article'): html = microformats2.render_content(att, include_location=reader, render_attachments=True) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties') or {}) html = '%s: %s' % (name.strip(), html) children.append(html) # render image(s) that we haven't already seen for image in image_atts + util.get_list(obj, 'image'): if not image: continue url = image.get('url') parsed = urllib.parse.urlparse(url) rest = urllib.parse.urlunparse(('', '') + parsed[2:]) img_src_re = re.compile(r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" % (re.escape(parsed.netloc), re.escape(rest))) if (url and url not in image_urls_seen and not img_src_re.search(obj['rendered_content'])): children.append(microformats2.img(url)) image_urls_seen.add(url) obj['rendered_children'] = [_encode_ampersands(child) for child in children] # make sure published and updated are strict RFC 3339 timestamps for prop in 'published', 'updated': val = obj.get(prop) if val: obj[prop] = util.maybe_iso8601_to_rfc3339(val) # Atom timestamps are even stricter than RFC 3339: they can't be naive ie # time zone unaware. They must have either an offset or the Z suffix. # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html if not util.TIMEZONE_OFFSET_RE.search(obj[prop]): obj[prop] += 'Z'
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') request_url = _remove_query_params(request_url) if request_url else host_url for a in activities: obj = a.get('object', {}) # Render content as HTML; escape &s content = obj.get('content') obj['rendered_content'] = _encode_ampersands(microformats2.render_content(obj)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or content or 'Untitled')) # Normalize attachments.image to always be a list. for att in obj.get('attachments', []): image = att.get('image') if image and not isinstance(image, list): att['image'] = [image] # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__(Defaulter, **{ k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()}) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u'' env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=activities[0]['object'].get('published', '') if activities else '', actor=Defaulter(**actor), )
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = _remove_query_params(host_url) if host_url else "https://github.com/snarfed/granary" request_url = _remove_query_params(request_url) if request_url else host_url for a in activities: act_type = source.object_type(a) if not act_type or act_type == "post": primary = a.get("object", {}) else: primary = a obj = a.setdefault("object", {}) # Render content as HTML; escape &s rendered = [] rendered.append(microformats2.render_content(primary)) obj["rendered_content"] = _encode_ampersands("\n".join(rendered)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get("title"): a["title"] = util.ellipsize( _encode_ampersands( a.get("displayName") or a.get("content") or obj.get("title") or obj.get("displayName") or obj.get("content") or "Untitled" ) ) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a["title"] = xml.sax.saxutils.escape(BeautifulSoup(a["title"], "html.parser").get_text("")) # Normalize attachments.image to always be a list. for att in primary.get("attachments", []): image = att.get("image") if image and not isinstance(image, list): att["image"] = [image] # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__( Defaulter, **{k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()} ) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u"" env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, "templates"), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, title=title or "User feed for " + source.Source.actor_name(actor), updated=activities[0]["object"].get("published", "") if activities else "", actor=Defaulter(**actor), )
def from_activities(activities, actor=None, title=None, feed_url=None, home_page_url=None, hfeed=None): """Converts ActivityStreams activities to an RSS 2.0 feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title feed_url: string, the URL for this RSS feed home_page_url: string, the home page URL hfeed: dict, parsed mf2 h-feed, if available Returns: unicode string with RSS 2.0 XML """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, basestring)): raise TypeError('activities may not be a dict or string') fg = FeedGenerator() fg.id(feed_url) assert feed_url fg.link(href=feed_url, rel='self') if home_page_url: fg.link(href=home_page_url, rel='alternate') # TODO: parse language from lang attribute: # https://github.com/microformats/mf2py/issues/150 fg.language('en') fg.generator('granary', uri='https://granary.io/') hfeed = hfeed or {} actor = actor or {} image = util.get_url(hfeed, 'image') or util.get_url(actor, 'image') if image: fg.image(image) props = hfeed.get('properties') or {} content = microformats2.get_text(util.get_first(props, 'content', '')) summary = util.get_first(props, 'summary', '') desc = content or summary or '-' fg.description(desc) # required fg.title(title or util.ellipsize(desc)) # required latest = None enclosures = False for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue item = fg.add_entry() url = obj.get('url') item.id(obj.get('id') or url) item.link(href=url) item.guid(url, permalink=True) item.title(obj.get('title') or obj.get('displayName') or '-') # required content = microformats2.render_content( obj, include_location=True, render_attachments=False) or obj.get('summary') if content: item.content(content, type='CDATA') item.category( [{'term': t['displayName']} for t in obj.get('tags', []) if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share')]) author = obj.get('author', {}) item.author({ 'name': author.get('displayName') or author.get('username'), 'uri': author.get('url'), }) published = obj.get('published') or obj.get('updated') if published: try: dt = mf2util.parse_datetime(published) if not isinstance(dt, datetime): dt = datetime.combine(dt, time.min) if not dt.tzinfo: dt = dt.replace(tzinfo=util.UTC) item.published(dt) if not latest or dt > latest: latest = dt except ValueError: # bad datetime string pass for att in obj.get('attachments', []): stream = util.get_first(att, 'stream') or att if not stream: continue url = stream.get('url') or '' mime = mimetypes.guess_type(url)[0] or '' if (att.get('objectType') in ENCLOSURE_TYPES or mime and mime.split('/')[0] in ENCLOSURE_TYPES): enclosures = True item.enclosure(url=url, type=mime, length='REMOVEME') # TODO: length (bytes) item.load_extension('podcast') duration = stream.get('duration') if duration: item.podcast.itunes_duration(duration) if enclosures: fg.load_extension('podcast') fg.podcast.itunes_author(actor.get('displayName') or actor.get('username')) if summary: fg.podcast.itunes_summary(summary) fg.podcast.itunes_explicit('no') fg.podcast.itunes_block(False) if latest: fg.lastBuildDate(latest) return fg.rss_str(pretty=True).decode('utf-8').replace(' length="REMOVEME"', '')
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None, xml_base=None, rels=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. xml_base: the base URL, if any. Used in the top-level xml:base attribute. rels: rel links to include. dict mapping string rel value to string URL. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') if request_url is None: request_url = host_url for a in activities: act_type = source.object_type(a) if not act_type or act_type == 'post': primary = a.get('object', {}) else: primary = a obj = a.setdefault('object', {}) # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands(microformats2.render_content(primary)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape(source.strip_html_tags(a['title'])) # Normalize attachments.image to always be a list. attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['image'] = util.get_list(att, 'image') obj['rendered_children'] = [ _encode_ampersands(microformats2.render_content(att)) for att in attachments if att.get('objectType') in ('note', 'article')] # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__(Defaulter, **{ k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()}) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u'' env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, xml_base=xml_base, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=activities[0]['object'].get('published', '') if activities else '', actor=Defaulter(**actor), rels=rels or {}, )