def _targets(self): """ Returns: list of string URLs, the source's inReplyTos or objects (if appropriate) """ targets = util.get_urls(self.source_obj, 'inReplyTo') if targets: return targets if self.source_obj.get('verb') in source.VERBS_WITH_OBJECT: return util.get_urls(self.source_obj, 'object')
def _prepare_activity(a, reader=True): """Preprocesses an activity to prepare it to be rendered as Atom. Modifies a in place. Args: a: ActivityStreams 1 activity dict reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. """ act_type = source.object_type(a) if not act_type or act_type == 'post': primary = a.get('object', {}) else: primary = a obj = a.setdefault('object', {}) # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands( microformats2.render_content(primary, include_location=reader, render_attachments=True)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize( _encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape( BeautifulSoup(a['title']).get_text('')) children = [] image_urls_seen = set() image_atts = [] # normalize attachments, render attached notes/articles attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['stream'] = util.get_first(att, 'stream') type = att.get('objectType') if type == 'image': image_atts.append(util.get_first(att, 'image')) continue image_urls_seen |= set(util.get_urls(att, 'image')) if type in ('note', 'article'): html = microformats2.render_content(att, include_location=reader, render_attachments=True) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties', [])) html = '%s: %s' % (name.strip(), html) children.append(html) # render image(s) that we haven't already seen for image in image_atts + util.get_list(obj, 'image'): if not image: continue url = image.get('url') parsed = urllib.parse.urlparse(url) rest = urllib.parse.urlunparse(('', '') + parsed[2:]) img_src_re = re.compile( r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" % (re.escape(parsed.netloc), re.escape(rest))) if (url and url not in image_urls_seen and not img_src_re.search(obj['rendered_content'])): children.append(microformats2.img(url)) image_urls_seen.add(url) obj['rendered_children'] = [ _encode_ampersands(child) for child in children ] # make sure published and updated are strict RFC 3339 timestamps for prop in 'published', 'updated': val = obj.get(prop) if val: obj[prop] = util.maybe_iso8601_to_rfc3339(val) # Atom timestamps are even stricter than RFC 3339: they can't be naive ie # time zone unaware. They must have either an offset or the Z suffix. # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html if not util.TIMEZONE_OFFSET_RE.search(obj[prop]): obj[prop] += 'Z'
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance( entry_class, basestring) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), 'photo': dedupe_urls( get_urls(attachments, 'image', 'image') + get_urls(primary, 'image')), 'video': dedupe_urls( get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [ object_to_json(author, trim_nulls=False, default_object_type='person') ], 'location': [ object_to_json(primary.get('location', {}), trim_nulls=False, default_object_type='place') ], 'comment': [ object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', []) ], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': [ object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] + attachments['article'] ] } # hashtags and person tags tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ('favorite', 'like'), ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs ] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type ] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(entry_class, basestring) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), # photo is special cased below, to handle alt 'video': dedupe_urls(get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': ( # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that # something is being "quoted," like in a quote tweet, so i cheat and use # extra knowledge here that quoted tweets are converted to note # attachments, but URLs in the tweet text are converted to article tags. [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] if 'startIndex' not in a] + [object_to_json(a, trim_nulls=False, entry_class=['h-cite']) for a in attachments['article'] if 'startIndex' not in a]) } # photos, including alt text photo_urls = set() ret['properties']['photo'] = [] for image in get_list(attachments, 'image') + [primary]: for url in get_urls(image, 'image'): if url and url not in photo_urls: photo_urls.add(url) name = get_first(image, 'image', {}).get('displayName') ret['properties']['photo'].append({'value': url, 'alt': name} if name else url) # hashtags and person tags if obj_type == 'tag': ret['properties']['tag-of'] = util.get_urls(obj, 'target') tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) if not tags and obj_type == 'tag': tags = util.get_list(obj, 'object') ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag' or obj_type == 'tag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ( ('favorite', 'like'), ('follow', 'follow'), ('like', 'like'), ('share', 'repost'), ): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def _prepare_activity(a, reader=True): """Preprocesses an activity to prepare it to be rendered as Atom. Modifies a in place. Args: a: ActivityStreams 1 activity dict reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. """ act_type = source.object_type(a) obj = util.get_first(a, 'object', default={}) primary = obj if (not act_type or act_type == 'post') else a # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands(microformats2.render_content( primary, include_location=reader, render_attachments=True)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text('')) children = [] image_urls_seen = set() image_atts = [] # normalize actor images for elem in a, obj: actor = elem.get('actor') if actor: actor['image'] = util.get_first(actor, 'image') # normalize attachments, render attached notes/articles attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['stream'] = util.get_first(att, 'stream') type = att.get('objectType') if type == 'image': att['image'] = util.get_first(att, 'image') image_atts.append(att['image']) continue image_urls_seen |= set(util.get_urls(att, 'image')) if type in ('note', 'article'): html = microformats2.render_content(att, include_location=reader, render_attachments=True) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties') or {}) html = '%s: %s' % (name.strip(), html) children.append(html) # render image(s) that we haven't already seen for image in image_atts + util.get_list(obj, 'image'): if not image: continue url = image.get('url') parsed = urllib.parse.urlparse(url) rest = urllib.parse.urlunparse(('', '') + parsed[2:]) img_src_re = re.compile(r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" % (re.escape(parsed.netloc), re.escape(rest))) if (url and url not in image_urls_seen and not img_src_re.search(obj['rendered_content'])): children.append(microformats2.img(url)) image_urls_seen.add(url) obj['rendered_children'] = [_encode_ampersands(child) for child in children] # make sure published and updated are strict RFC 3339 timestamps for prop in 'published', 'updated': val = obj.get(prop) if val: obj[prop] = util.maybe_iso8601_to_rfc3339(val) # Atom timestamps are even stricter than RFC 3339: they can't be naive ie # time zone unaware. They must have either an offset or the Z suffix. # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html if not util.TIMEZONE_OFFSET_RE.search(obj[prop]): obj[prop] += 'Z'
def render_content(obj, include_location=True, synthesize_content=True, render_attachments=False, render_image=False, white_space_pre=True): """Renders the content of an ActivityStreams object as HTML. Includes tags, mentions, and non-note/article attachments. (Note/article attachments are converted to mf2 children in object_to_json and then rendered in json_to_html.) Note that the returned HTML is included in Atom as well as HTML documents, so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc. Args: obj: decoded JSON ActivityStreams object include_location: boolean, whether to render location, if provided synthesize_content: boolean, whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' render_attachments: boolean, whether to render attachments, eg links, images, audio, and video render_image: boolean, whether to render the object's image(s) white_space_pre: boolean, whether to wrap in CSS white-space: pre. If False, newlines will be converted to <br> tags instead. Background: https://indiewebcamp.com/note#Indieweb_whitespace_thinking Returns: string, rendered HTML """ content = obj.get('content', '') # extract tags. preserve order but de-dupe, ie don't include a tag more than # once. seen_ids = set() mentions = [] tags = {} # maps string objectType to list of tag objects for t in obj.get('tags', []): id = t.get('id') if id and id in seen_ids: continue seen_ids.add(id) if 'startIndex' in t and 'length' in t and 'url' in t: mentions.append(t) else: tags.setdefault(source.object_type(t), []).append(t) # linkify embedded mention tags inside content. if mentions: mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = util.WideUnicode(content) content = util.WideUnicode('') for tag in mentions: start = tag['startIndex'] end = start + tag['length'] content = util.WideUnicode('%s%s<a href="%s">%s</a>' % ( content, orig[last_end:start], tag['url'], orig[start:end])) last_end = end content += orig[last_end:] # is whitespace in this content meaningful? standard heuristic: if there are # no HTML tags in it, and it has a newline, then assume yes. # https://indiewebcamp.com/note#Indieweb_whitespace_thinking # https://github.com/snarfed/granary/issues/80 if content and not obj.get('content_is_html') and '\n' in content: if white_space_pre: content = '<div style="white-space: pre">%s</div>' % content else: content = content.replace('\n', '<br />\n') # linkify embedded links. ignore the "mention" tags that we added ourselves. # TODO: fix the bug in test_linkify_broken() in webutil/tests/test_util.py, then # uncomment this. # if content: # content = util.linkify(content) # the image field. may be multiply valued. rendered_urls = set() if render_image: urls = get_urls(obj, 'image') content += _render_attachments([{ 'objectType': 'image', 'image': {'url': url}, } for url in urls], obj) rendered_urls = set(urls) # attachments, e.g. links (aka articles) # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/ if render_attachments: atts = [a for a in obj.get('attachments', []) if a.get('objectType') not in ('note', 'article') and get_url(a, 'image') not in rendered_urls] content += _render_attachments(atts + tags.pop('article', []), obj) # generate share/like contexts if the activity does not have content # of its own obj_type = source.object_type(obj) for as_type, verb in ( ('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')): if (not synthesize_content or obj_type != as_type or 'object' not in obj or 'content' in obj): continue targets = get_list(obj, 'object') if not targets: continue for target in targets: # sometimes likes don't have enough content to render anything # interesting if 'url' in target and set(target) <= set(['url', 'objectType']): content += '<a href="%s">%s this.</a>' % ( target.get('url'), verb.lower()) else: author = target.get('author', target.get('actor', {})) # special case for twitter RT's if obj_type == 'share' and 'url' in obj and re.search( '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')): content += 'RT <a href="%s">@%s</a> ' % ( target.get('url', '#'), author.get('username')) else: # image looks bad in the simplified rendering author = {k: v for k, v in author.items() if k != 'image'} content += '%s <a href="%s">%s</a> by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get('title', 'a post')), hcard_to_html(object_to_json(author, default_object_type='person')), ) content += render_content(target, include_location=include_location, synthesize_content=synthesize_content, white_space_pre=white_space_pre) # only include the first context in the content (if there are # others, they'll be included as separate properties) break break if render_attachments and obj.get('verb') == 'share': atts = [att for att in itertools.chain.from_iterable( o.get('attachments', []) for o in util.get_list(obj, 'object')) if att.get('objectType') not in ('note', 'article')] content += _render_attachments(atts, obj) # location loc = obj.get('location') if include_location and loc: content += '\n<p>%s</p>' % hcard_to_html( object_to_json(loc, default_object_type='place'), parent_props=['p-location']) # these are rendered manually in json_to_html() for type in 'like', 'share', 'react', 'person': tags.pop(type, None) # render the rest content += tags_to_html(tags.pop('hashtag', []), 'p-category') content += tags_to_html(tags.pop('mention', []), 'u-mention', visible=False) content += tags_to_html(sum(tags.values(), []), 'tag') return content
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo') or [] if not in_reply_tos: context = obj.get('context') if context and isinstance(context, dict): in_reply_tos = context.get('inReplyTo') or [] is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # prefer duration and size from object's stream, then first video, then first # audio stream = {} for candidate in [obj] + attachments['video'] + attachments['audio']: for stream in get_list(candidate, 'stream'): if stream: break duration = stream.get('duration') if duration is not None: if util.is_int(duration): duration = str(duration) else: logging('Ignoring duration %r; expected int, got %s', duration.__class__) duration = None sizes = [] size = stream.get('size') if size: sizes = [str(size)] # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(entry_class, str) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), # photo is special cased below, to handle alt 'video': dedupe_urls(get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'duration': [duration], 'size': sizes, 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': ( # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that # something is being "quoted," like in a quote tweet, so i cheat and use # extra knowledge here that quoted tweets are converted to note # attachments, but URLs in the tweet text are converted to article tags. [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] if 'startIndex' not in a] + [object_to_json(a, trim_nulls=False, entry_class=['h-cite']) for a in attachments['article'] if 'startIndex' not in a]) } # content. emulate e- vs p- microformats2 parsing: e- if there are HTML tags, # otherwise p-. # https://indiewebcamp.com/note#Indieweb_whitespace_thinking text = xml.sax.saxutils.unescape(primary.get('content', '')) html = render_content(primary, include_location=False, synthesize_content=synthesize_content) if '<' in html: ret['properties']['content'] = [{'value': text, 'html': html}] else: ret['properties']['content'] = [text] # photos, including alt text photo_urls = set() ret['properties']['photo'] = [] for image in get_list(attachments, 'image') + [primary]: for url in get_urls(image, 'image'): if url and url not in photo_urls: photo_urls.add(url) name = get_first(image, 'image', {}).get('displayName') ret['properties']['photo'].append({'value': url, 'alt': name} if name else url) # hashtags and person tags if obj_type == 'tag': ret['properties']['tag-of'] = util.get_urls(obj, 'target') tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) if not tags and obj_type == 'tag': tags = util.get_list(obj, 'object') ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag' or obj_type == 'tag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ( ('favorite', 'like'), ('follow', 'follow'), ('like', 'like'), ('share', 'repost'), ): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret