def send_webmentions(handler, activity, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: handler: RequestHandler activity: dict, AS1 activity response_props: passed through to the newly created Responses """ verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(handler, '%s activities are not supported yet.' % verb) # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source: source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) if verb in ('like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error(handler, "Couldn't find original post URL") if not targets: error(handler, "Couldn't find target URLs (inReplyTo or object)") # send webmentions and store Responses errors = [] for target in targets: if not target: continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = response.proxy_url() if verb in ('like', 'share') else source logging.info('Sending webmention from %s to %s', wm_source, target) wm = send.WebmentionSend(wm_source, target) if wm.send(headers=HEADERS): logging.info('Success: %s', wm.response) response.status = 'complete' else: logging.warning('Failed: %s', wm.error) errors.append(wm.error) response.status = 'error' response.put() if errors: msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors) error(handler, msg, status=errors[0].get('http_status'))
def append_in_reply_to(before, after): """appends the inReplyTos from the before object to the after object, in place Args: before, after: dicts, ActivityStreams activities or objects """ obj_b = before.get('object', before) obj_a = after.get('object', after) if obj_b and obj_a: reply_b = util.get_list(obj_b, 'inReplyTo') reply_a = util.get_list(obj_a, 'inReplyTo') obj_a['inReplyTo'] = util.dedupe_urls(reply_a + reply_b)
def _create(self, obj, preview=None, include_link=source.OMIT_LINK, ignore_formatting=False): """Creates or previews a status (aka toot), reply, boost (aka reblog), or favorite. https://docs.joinmastodon.org/api/rest/statuses/ Based on :meth:`Twitter._create`. Args: obj: ActivityStreams object preview: boolean include_link: string ignore_formatting: boolean Returns: CreationResult. If preview is True, the content will be a unicode string HTML snippet. If False, it will be a dict with 'id' and 'url' keys for the newly created object. """ assert preview in (False, True) type = obj.get('objectType') verb = obj.get('verb') base_obj = self.base_object(obj) base_id = base_obj.get('id') base_url = base_obj.get('url') is_reply = type == 'comment' or obj.get('inReplyTo') is_rsvp = (verb and verb.startswith('rsvp-')) or verb == 'invite' atts = obj.get('attachments', []) images = util.dedupe_urls( util.get_list(obj, 'image') + [a for a in atts if a.get('objectType') == 'image']) videos = util.dedupe_urls( [obj] + [a for a in atts if a.get('objectType') == 'video'], key='stream') has_media = (images or videos) and (type in ('note', 'article') or is_reply) # prefer displayName over content for articles # # TODO: handle activities as well as objects? ie pull out ['object'] here if # necessary? type = obj.get('objectType') prefer_content = type == 'note' or (base_url and is_reply) preview_description = '' content = self._content_for_create(obj, ignore_formatting=ignore_formatting, prefer_name=not prefer_content) if not content: if type == 'activity' and not is_rsvp: content = verb elif has_media: content = '' else: return source.creation_result( abort=False, # keep looking for things to publish, error_plain='No content text found.', error_html='No content text found.') post_label = '%s %s' % (self.NAME, self.TYPE_LABELS['post']) if is_reply and not base_url: return source.creation_result( abort=True, error_plain='Could not find a %s to reply to.' % post_label, error_html= 'Could not find a %s to <a href="http://indiewebcamp.com/reply">reply to</a>. Check that your post has the right <a href="http://indiewebcamp.com/comment">in-reply-to</a> link.' % post_label) # truncate and ellipsize content if necessary # TODO: don't count domains in remote mentions. # https://docs.joinmastodon.org/usage/basics/#text content = self.truncate(content, obj.get('url'), include_link, type) # linkify user mentions def linkify_mention(match): split = match.group(1).split('@') username = split[0] instance = ('https://' + split[1]) if len(split) > 1 else self.instance url = urllib.parse.urljoin(instance, '/@' + username) return '<a href="%s">@%s</a>' % (url, username) preview_content = MENTION_RE.sub(linkify_mention, content) # linkify (defaults to twitter's behavior) preview_content = util.linkify(preview_content, pretty=True, skip_bare_cc_tlds=True) tags_url = urllib.parse.urljoin(self.instance, '/tags') preview_content = HASHTAG_RE.sub( r'\1<a href="%s/\2">#\2</a>' % tags_url, preview_content) # switch on activity type if type == 'activity' and verb == 'like': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a %s to %s.' % (post_label, self.TYPE_LABELS['like']), error_html= 'Could not find a %s to <a href="http://indiewebcamp.com/like">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/like">u-like-of link</a>.' % (post_label, self.TYPE_LABELS['like'])) if preview: preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % ( self.TYPE_LABELS['like'], base_url, self.TYPE_LABELS['post'], self.embed_post(base_obj)) return source.creation_result(description=preview_description) else: resp = self._post(API_FAVORITE % base_id) resp['type'] = 'like' elif type == 'activity' and verb == 'share': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a %s to %s.' % (post_label, self.TYPE_LABELS['repost']), error_html= 'Could not find a %s to <a href="http://indiewebcamp.com/repost">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/repost">repost-of</a> link.' % (post_label, self.TYPE_LABELS['repost'])) if preview: preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % ( self.TYPE_LABELS['repost'], base_url, self.TYPE_LABELS['post'], self.embed_post(base_obj)) return source.creation_result(description=preview_description) else: resp = self._post(API_REBLOG % base_id) resp['type'] = 'repost' elif type in ('note', 'article') or is_reply or is_rsvp: # a post data = {'status': content} if is_reply: preview_description += 'add a <span class="verb">%s</span> to <a href="%s">this %s</a>: %s' % ( self.TYPE_LABELS['comment'], base_url, self.TYPE_LABELS['post'], self.embed_post(base_obj)) data['in_reply_to_id'] = base_id else: preview_description += '<span class="verb">%s</span>:' % self.TYPE_LABELS[ 'post'] num_media = len(videos) + len(images) if num_media > MAX_MEDIA: videos = videos[:MAX_MEDIA] images = images[:max(MAX_MEDIA - len(videos), 0)] logging.warning('Found %d media! Only using the first %d: %r', num_media, MAX_MEDIA, videos + images) if preview: media_previews = [ '<video controls src="%s"><a href="%s">%s</a></video>' % (util.get_url(vid, key='stream'), util.get_url(vid, key='stream'), vid.get('displayName') or 'this video') for vid in videos ] + [ '<img src="%s" alt="%s" />' % (util.get_url(img), img.get('displayName') or '') for img in images ] if media_previews: preview_content += '<br /><br />' + ' '.join( media_previews) return source.creation_result(content=preview_content, description=preview_description) else: ids = self.upload_media(videos + images) if ids: data['media_ids'] = ids resp = self._post(API_STATUSES, json=data) else: return source.creation_result( abort=False, error_plain='Cannot publish type=%s, verb=%s to Mastodon' % (type, verb), error_html='Cannot publish type=%s, verb=%s to Mastodon' % (type, verb)) if 'url' not in resp: resp['url'] = base_url return source.creation_result(resp)
def json_to_object(mf2, actor=None, fetch_mf2=False): """Converts microformats2 JSON to an ActivityStreams object. Args: mf2: dict, decoded JSON microformats2 object actor: optional author AS actor object. usually comes from a rel="author" link. if mf2 has its own author, that will override this. fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary, e.g. to determine authorship: https://indieweb.org/authorship Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} mf2 = copy.copy(mf2) props = mf2.setdefault('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') # convert author mf2_author = prop.get('author') if mf2_author and isinstance(mf2_author, dict): author = json_to_object(mf2_author) else: # the author h-card may be on another page. run full authorship algorithm: # https://indieweb.org/authorship def fetch(url): return mf2py.parse(util.requests_get(url).text, url=url) author = mf2util.find_author( {'items': [mf2]}, hentry=mf2, fetch_mf2_func=fetch if fetch_mf2 else None) if author: author = { 'objectType': 'person', 'url': author.get('url'), 'displayName': author.get('name'), 'image': [{ 'url': author.get('photo') }], } if not author: author = actor mf2_types = mf2.get('type') or [] if 'h-geo' in mf2_types or 'p-location' in mf2_types: mf2_type = 'location' else: # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type # *is* a photo. so, special case photo type to fall through to underlying # mf2 type without photo. # https://github.com/snarfed/bridgy/issues/702 without_photo = copy.deepcopy(mf2) without_photo.get('properties', {}).pop('photo', None) mf2_type = mf2util.post_type_discovery(without_photo) as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None)) if rsvp: as_verb = 'rsvp-%s' % rsvp # special case GitHub issues that are in-reply-to the repo or its issues URL in_reply_tos = get_string_urls(props.get('in-reply-to', [])) for url in in_reply_tos: if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url): as_type = 'issue' def absolute_urls(prop): return [ url for url in get_string_urls(props.get(prop, [])) # filter out relative and invalid URLs (mf2py gives absolute urls) if urllib.parse.urlparse(url).netloc ] urls = props.get('url') and get_string_urls(props.get('url')) # quotations: https://indieweb.org/quotation#How_to_markup attachments = [ json_to_object(quote) for quote in mf2.get('children', []) + props.get('quotation-of', []) if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', [])) ] # audio and video for type in 'audio', 'video': attachments.extend({ 'objectType': type, 'stream': { 'url': url } } for url in get_string_urls(props.get(type, []))) obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'startTime': prop.get('start'), 'endTime': prop.get('end'), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{ 'value': u } for u in urls] if urls and len(urls) > 1 else None, 'image': [{ 'url': url } for url in dedupe_urls( absolute_urls('photo') + absolute_urls('featured'))], 'stream': [{ 'url': url } for url in absolute_urls('video')], 'location': json_to_object(prop.get('location')), 'replies': { 'items': [json_to_object(c) for c in props.get('comment', [])] }, 'tags': [{ 'objectType': 'hashtag', 'displayName': cat } if isinstance(cat, basestring) else json_to_object(cat) for cat in props.get('category', [])], 'attachments': attachments, } # mf2util uses the indieweb/mf2 location algorithm to collect location properties. interpreted = mf2util.interpret({'items': [mf2]}, None) if interpreted: loc = interpreted.get('location') if loc: obj['location']['objectType'] = 'place' lat, lng = loc.get('latitude'), loc.get('longitude') if lat and lng: try: obj['location'].update({ 'latitude': float(lat), 'longitude': float(lng), }) except ValueError: logging.warn( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ('like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else { 'url': target } # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) else: obj.update({ 'inReplyTo': [{ 'url': url } for url in in_reply_tos], 'author': author, }) return source.Source.postprocess_object(obj)
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance( entry_class, basestring) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), 'photo': dedupe_urls( get_urls(attachments, 'image', 'image') + get_urls(primary, 'image')), 'video': dedupe_urls( get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [ object_to_json(author, trim_nulls=False, default_object_type='person') ], 'location': [ object_to_json(primary.get('location', {}), trim_nulls=False, default_object_type='place') ], 'comment': [ object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', []) ], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': [ object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] + attachments['article'] ] } # hashtags and person tags tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ('favorite', 'like'), ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs ] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type ] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def original_post_discovery(activity, domains=None, cache=None, include_redirect_sources=True, **kwargs): """Discovers original post links. This is a variation on http://indiewebcamp.com/original-post-discovery . It differs in that it finds multiple candidate links instead of one, and it doesn't bother looking for MF2 (etc) markup because the silos don't let you input it. More background: https://github.com/snarfed/bridgy/issues/51#issuecomment-136018857 Original post candidates come from the upstreamDuplicates, attachments, and tags fields, as well as links and permashortlinks/permashortcitations in the text content. Args: activity: activity dict domains: optional sequence of domains. If provided, only links to these domains will be considered original and stored in upstreamDuplicates. (Permashortcitations are exempt.) cache: optional, a cache object for storing resolved URL redirects. Passed to follow_redirects(). include_redirect_sources: boolean, whether to include URLs that redirect as well as their final destination URLs kwargs: passed to requests.head() when following redirects Returns: ([string original post URLs], [string mention URLs]) tuple """ obj = activity.get('object') or activity content = obj.get('content', '').strip() # find all candidate URLs tags = [ t.get('url') for t in obj.get('attachments', []) + obj.get('tags', []) if t.get('objectType') in ('article', 'mention', None) ] candidates = tags + util.extract_links(content) + obj.get( 'upstreamDuplicates', []) # Permashortcitations (http://indiewebcamp.com/permashortcitation) are short # references to canonical copies of a given (usually syndicated) post, of # the form (DOMAIN PATH). We consider them an explicit original post link. candidates += [ match.expand(r'http://\1/\2') for match in Source._PERMASHORTCITATION_RE.finditer(content) ] candidates = set( filter( None, ( util.clean_url(url) for url in candidates # heuristic: ellipsized URLs are probably incomplete, so omit them. if url and not url.endswith('...') and not url.endswith(u'…')))) # check for redirect and add their final urls redirects = {} # maps final URL to original URL for redirects for url in list(candidates): resolved = util.follow_redirects(url, cache=cache, **kwargs) if (resolved.url != url and resolved.headers.get( 'content-type', '').startswith('text/html')): redirects[resolved.url] = url candidates.add(resolved.url) # use domains to determine which URLs are original post links vs mentions originals = set() mentions = set() for url in util.dedupe_urls(candidates): if url in redirects.values(): # this is a redirected original URL. postpone and handle it when we hit # its final URL so that we know the final domain. continue domain = util.domain_from_link(url) which = (originals if not domains or util.domain_or_parent_in( domain, domains) else mentions) which.add(url) redirected_from = redirects.get(url) if redirected_from and include_redirect_sources: which.add(redirected_from) logging.info( 'Original post discovery found original posts %s, mentions %s', originals, mentions) return originals, mentions
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(entry_class, basestring) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), # photo is special cased below, to handle alt 'video': dedupe_urls(get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': ( # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that # something is being "quoted," like in a quote tweet, so i cheat and use # extra knowledge here that quoted tweets are converted to note # attachments, but URLs in the tweet text are converted to article tags. [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] if 'startIndex' not in a] + [object_to_json(a, trim_nulls=False, entry_class=['h-cite']) for a in attachments['article'] if 'startIndex' not in a]) } # photos, including alt text photo_urls = set() ret['properties']['photo'] = [] for image in get_list(attachments, 'image') + [primary]: for url in get_urls(image, 'image'): if url and url not in photo_urls: photo_urls.add(url) name = get_first(image, 'image', {}).get('displayName') ret['properties']['photo'].append({'value': url, 'alt': name} if name else url) # hashtags and person tags if obj_type == 'tag': ret['properties']['tag-of'] = util.get_urls(obj, 'target') tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) if not tags and obj_type == 'tag': tags = util.get_list(obj, 'object') ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag' or obj_type == 'tag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ( ('favorite', 'like'), ('follow', 'follow'), ('like', 'like'), ('share', 'repost'), ): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def original_post_discovery(activity, domains=None, cache=None, include_redirect_sources=True, **kwargs): """Discovers original post links. This is a variation on http://indiewebcamp.com/original-post-discovery . It differs in that it finds multiple candidate links instead of one, and it doesn't bother looking for MF2 (etc) markup because the silos don't let you input it. More background: https://github.com/snarfed/bridgy/issues/51#issuecomment-136018857 Original post candidates come from the upstreamDuplicates, attachments, and tags fields, as well as links and permashortlinks/permashortcitations in the text content. Args: activity: activity dict domains: optional sequence of domains. If provided, only links to these domains will be considered original and stored in upstreamDuplicates. (Permashortcitations are exempt.) cache: optional, a cache object for storing resolved URL redirects. Passed to follow_redirects(). include_redirect_sources: boolean, whether to include URLs that redirect as well as their final destination URLs kwargs: passed to requests.head() when following redirects Returns: ([string original post URLs], [string mention URLs]) tuple """ obj = activity.get("object") or activity content = obj.get("content", "").strip() # find all candidate URLs tags = [ t.get("url") for t in obj.get("attachments", []) + obj.get("tags", []) if t.get("objectType") in ("article", "mention", None) ] candidates = tags + util.extract_links(content) + obj.get("upstreamDuplicates", []) # Permashortcitations (http://indiewebcamp.com/permashortcitation) are short # references to canonical copies of a given (usually syndicated) post, of # the form (DOMAIN PATH). We consider them an explicit original post link. candidates += [match.expand(r"http://\1/\2") for match in Source._PERMASHORTCITATION_RE.finditer(content)] candidates = set( filter( None, ( util.clean_url(url) for url in candidates # heuristic: ellipsized URLs are probably incomplete, so omit them. if url and not url.endswith("...") and not url.endswith(u"…") ), ) ) # check for redirect and add their final urls redirects = {} # maps final URL to original URL for redirects for url in list(candidates): resolved = follow_redirects(url, cache=cache, **kwargs) if resolved.url != url and resolved.headers.get("content-type", "").startswith("text/html"): redirects[resolved.url] = url candidates.add(resolved.url) # use domains to determine which URLs are original post links vs mentions originals = set() mentions = set() for url in util.dedupe_urls(candidates): if url in redirects.values(): # this is a redirected original URL. postpone and handle it when we hit # its final URL so that we know the final domain. continue which = originals if not domains or util.domain_from_link(url) in domains else mentions which.add(url) redirected_from = redirects.get(url) if redirected_from and include_redirect_sources: which.add(redirected_from) logging.info("Original post discovery found original posts %s, mentions %s", originals, mentions) return originals, mentions
def postprocess_as2(activity, target=None, key=None): """Prepare an AS2 object to be served or sent via ActivityPub. Args: activity: dict, AS2 object or activity target: dict, AS2 object, optional. The target of activity's inReplyTo or Like/Announce/etc object, if any. key: :class:`models.MagicKey`, optional. populated into publicKey field if provided. """ type = activity.get('type') # actor objects if type == 'Person': postprocess_as2_actor(activity) if not activity.get('publicKey'): # underspecified, inferred from this issue and Mastodon's implementation: # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229 # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77 activity.update({ 'publicKey': { 'id': activity.get('preferredUsername'), 'publicKeyPem': key.public_pem().decode(), }, '@context': (util.get_list(activity, '@context') + ['https://w3id.org/security/v1']), }) return activity for actor in (util.get_list(activity, 'attributedTo') + util.get_list(activity, 'actor')): postprocess_as2_actor(actor) # inReplyTo: singly valued, prefer id over url target_id = target.get('id') if target else None in_reply_to = activity.get('inReplyTo') if in_reply_to: if target_id: activity['inReplyTo'] = target_id elif isinstance(in_reply_to, list): if len(in_reply_to) > 1: logging.warning("AS2 doesn't support multiple inReplyTo URLs! " 'Only using the first: %s' % in_reply_to[0]) activity['inReplyTo'] = in_reply_to[0] # Mastodon evidently requires a Mention tag for replies to generate a # notification to the original post's author. not required for likes, # reposts, etc. details: # https://github.com/snarfed/bridgy-fed/issues/34 if target: for to in (util.get_list(target, 'attributedTo') + util.get_list(target, 'actor')): if isinstance(to, dict): to = to.get('url') or to.get('id') if to: activity.setdefault('tag', []).append({ 'type': 'Mention', 'href': to, }) # activity objects (for Like, Announce, etc): prefer id over url obj = activity.get('object') if obj: if isinstance(obj, dict) and not obj.get('id'): obj['id'] = target_id or obj.get('url') elif target_id and obj != target_id: activity['object'] = target_id # id is required for most things. default to url if it's not set. if not activity.get('id'): activity['id'] = activity.get('url') # TODO: find a better way to check this, sometimes or always? # removed for now since it fires on posts without u-id or u-url, eg # https://chrisbeckstrom.com/2018/12/27/32551/ # assert activity.get('id') or (isinstance(obj, dict) and obj.get('id')) activity['id'] = redirect_wrap(activity.get('id')) activity['url'] = redirect_wrap(activity.get('url')) # copy image(s) into attachment(s). may be Mastodon-specific. # https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618 obj_or_activity = obj if isinstance(obj, dict) else activity obj_or_activity.setdefault('attachment', []).extend(obj_or_activity.get('image', [])) # cc public and target's author(s) and recipients # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting # https://w3c.github.io/activitypub/#delivery if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'): recips = [AS2_PUBLIC_AUDIENCE] if target: recips += itertools.chain(*(util.get_list(target, field) for field in ('actor', 'attributedTo', 'to', 'cc'))) activity['cc'] = util.dedupe_urls( util.get_url(recip) or recip.get('id') for recip in recips) # wrap articles and notes in a Create activity if type in ('Article', 'Note'): activity = { '@context': as2.CONTEXT, 'type': 'Create', 'id': f'{activity["id"]}#bridgy-fed-create', 'object': activity, } return util.trim_nulls(activity)
def send_webmentions(activity_wrapped, proxy=None, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: activity_wrapped: dict, AS1 activity response_props: passed through to the newly created Responses """ activity = redirect_unwrap(activity_wrapped) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(f'{verb} activities are not supported yet.') # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source or verb in ('create', 'post', 'update'): source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) tags = util.get_list(activity_wrapped, 'tags') obj_wrapped = activity_wrapped.get('object') if isinstance(obj_wrapped, dict): tags.extend(util.get_list(obj_wrapped, 'tags')) for tag in tags: if tag.get('objectType') == 'mention': url = tag.get('url') if url and url.startswith(request.host_url): targets.append(redirect_unwrap(url)) if verb in ('follow', 'like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error("Couldn't find original post URL") if not targets: error( "Couldn't find any target URLs in inReplyTo, object, or mention tags" ) # send webmentions and store Responses errors = [] # stores (code, body) tuples for target in targets: if util.domain_from_link(target) == util.domain_from_link(source): logging.info( f'Skipping same-domain webmention from {source} to {target}') continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = (response.proxy_url() if verb in ('follow', 'like', 'share') or proxy else source) logging.info(f'Sending webmention from {wm_source} to {target}') try: endpoint = webmention.discover(target, headers=HEADERS).endpoint if endpoint: webmention.send(endpoint, wm_source, target, headers=HEADERS) response.status = 'complete' logging.info('Success!') else: response.status = 'ignored' logging.info('Ignoring.') except BaseException as e: errors.append(util.interpret_http_exception(e)) response.put() if errors: msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors) error(msg, status=int(errors[0][0] or 502))
def template_vars(self, domain, url=None): assert domain if domain.split('.')[-1] in NON_TLDS: common.error(self, "%s doesn't look like a domain" % domain, status=404) # find representative h-card. try url, then url's home page, then domain urls = ['http://%s/' % domain] if url: urls = [url, urlparse.urljoin(url, '/')] + urls for candidate in urls: resp = common.requests_get(candidate) parsed = common.beautifulsoup_parse(resp.content, from_encoding=resp.encoding) mf2 = mf2py.parse(parsed, url=resp.url, img_with_alt=True) # logging.debug('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2)) hcard = mf2util.representative_hcard(mf2, resp.url) if hcard: logging.info('Representative h-card: %s', json.dumps(hcard, indent=2)) break else: common.error( self, """\ Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on %s""" % resp.url) logging.info('Generating WebFinger data for %s', domain) key = models.MagicKey.get_or_create(domain) props = hcard.get('properties', {}) urls = util.dedupe_urls(props.get('url', []) + [resp.url]) canonical_url = urls[0] acct = '%s@%s' % (domain, domain) for url in urls: if url.startswith('acct:'): urluser, urldomain = util.parse_acct_uri(url) if urldomain == domain: acct = '%s@%s' % (urluser, domain) logging.info('Found custom username: acct:%s', acct) break # discover atom feed, if any atom = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if atom and atom['href']: atom = urlparse.urljoin(resp.url, atom['href']) else: atom = 'https://granary.io/url?' + urllib.urlencode( { 'input': 'html', 'output': 'atom', 'url': resp.url, 'hub': resp.url, }) # discover PuSH, if any for link in resp.headers.get('Link', '').split(','): match = common.LINK_HEADER_RE.match(link) if match and match.group(2) == 'hub': hub = match.group(1) else: hub = 'https://bridgy-fed.superfeedr.com/' # generate webfinger content data = util.trim_nulls({ 'subject': 'acct:' + acct, 'aliases': urls, 'magic_keys': [{ 'value': key.href() }], 'links': sum(([{ 'rel': 'http://webfinger.net/rel/profile-page', 'type': 'text/html', 'href': url, }] for url in urls if url.startswith("http")), []) + [{ 'rel': 'http://webfinger.net/rel/avatar', 'href': url, } for url in props.get('photo', [])] + [ { 'rel': 'canonical_uri', 'type': 'text/html', 'href': canonical_url, }, # ActivityPub { 'rel': 'self', 'type': 'application/activity+json', # use HOST_URL instead of e.g. request.host_url because it # sometimes lost port, e.g. http://localhost:8080 would become # just http://localhost. no clue how or why. 'href': '%s/%s' % (appengine_config.HOST_URL, domain), }, { 'rel': 'inbox', 'type': 'application/activity+json', 'href': '%s/%s/inbox' % (appengine_config.HOST_URL, domain), }, # OStatus { 'rel': 'http://schemas.google.com/g/2010#updates-from', 'type': common.CONTENT_TYPE_ATOM, 'href': atom, }, { 'rel': 'hub', 'href': hub, }, { 'rel': 'magic-public-key', 'href': key.href(), }, { 'rel': 'salmon', 'href': '%s/%s/salmon' % (appengine_config.HOST_URL, domain), } ] }) logging.info('Returning WebFinger data: %s', json.dumps(data, indent=2)) return data
def postprocess_as2(activity, target=None, key=None): """Prepare an AS2 object to be served or sent via ActivityPub. Args: activity: dict, AS2 object or activity target: dict, AS2 object, optional. The target of activity's inReplyTo or Like/Announce/etc object, if any. key: MagicKey, optional. populated into publicKey field if provided. """ type = activity.get('type') # actor objects if type == 'Person': postprocess_as2_actor(activity) if not activity.get('publicKey'): # underspecified, inferred from this issue and Mastodon's implementation: # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229 # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77 activity['publicKey'] = { 'publicKeyPem': key.public_pem(), } return activity for actor in (util.get_list(activity, 'attributedTo') + util.get_list(activity, 'actor')): postprocess_as2_actor(actor) # inReplyTo: singly valued, prefer id over url target_id = target.get('id') if target else None in_reply_to = activity.get('inReplyTo') if in_reply_to: if target_id: activity['inReplyTo'] = target_id elif isinstance(in_reply_to, list): if len(in_reply_to) > 1: logging.warning( "AS2 doesn't support multiple inReplyTo URLs! " 'Only using the first: %s' % in_reply_tos[0]) activity['inReplyTo'] = in_reply_to[0] # activity objects (for Like, Announce, etc): prefer id over url obj = activity.get('object', {}) if obj: if isinstance(obj, dict) and not obj.get('id'): obj['id'] = target_id or obj.get('url') elif obj != target_id: activity['object'] = target_id # id is required for most things. default to url if it's not set. if not activity.get('id'): activity['id'] = activity.get('url') assert activity.get('id') or (isinstance(obj, dict) and obj.get('id')) # cc public and target's author(s) and recipients # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting # https://w3c.github.io/activitypub/#delivery if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'): recips = [AS2_PUBLIC_AUDIENCE] if target: recips += itertools.chain(*(util.get_list(target, field) for field in ('actor', 'attributedTo', 'to', 'cc'))) activity['cc'] = util.dedupe_urls(util.get_url(recip) for recip in recips) # wrap articles and notes in a Create activity if type in ('Article', 'Note'): activity = { '@context': as2.CONTEXT, 'type': 'Create', 'object': activity, } return util.trim_nulls(activity)
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo') or [] if not in_reply_tos: context = obj.get('context') if context and isinstance(context, dict): in_reply_tos = context.get('inReplyTo') or [] is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # prefer duration and size from object's stream, then first video, then first # audio stream = {} for candidate in [obj] + attachments['video'] + attachments['audio']: for stream in get_list(candidate, 'stream'): if stream: break duration = stream.get('duration') if duration is not None: if util.is_int(duration): duration = str(duration) else: logging('Ignoring duration %r; expected int, got %s', duration.__class__) duration = None sizes = [] size = stream.get('size') if size: sizes = [str(size)] # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(entry_class, str) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), # photo is special cased below, to handle alt 'video': dedupe_urls(get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'duration': [duration], 'size': sizes, 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': ( # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that # something is being "quoted," like in a quote tweet, so i cheat and use # extra knowledge here that quoted tweets are converted to note # attachments, but URLs in the tweet text are converted to article tags. [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] if 'startIndex' not in a] + [object_to_json(a, trim_nulls=False, entry_class=['h-cite']) for a in attachments['article'] if 'startIndex' not in a]) } # content. emulate e- vs p- microformats2 parsing: e- if there are HTML tags, # otherwise p-. # https://indiewebcamp.com/note#Indieweb_whitespace_thinking text = xml.sax.saxutils.unescape(primary.get('content', '')) html = render_content(primary, include_location=False, synthesize_content=synthesize_content) if '<' in html: ret['properties']['content'] = [{'value': text, 'html': html}] else: ret['properties']['content'] = [text] # photos, including alt text photo_urls = set() ret['properties']['photo'] = [] for image in get_list(attachments, 'image') + [primary]: for url in get_urls(image, 'image'): if url and url not in photo_urls: photo_urls.add(url) name = get_first(image, 'image', {}).get('displayName') ret['properties']['photo'].append({'value': url, 'alt': name} if name else url) # hashtags and person tags if obj_type == 'tag': ret['properties']['tag-of'] = util.get_urls(obj, 'target') tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) if not tags and obj_type == 'tag': tags = util.get_list(obj, 'object') ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag' or obj_type == 'tag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ( ('favorite', 'like'), ('follow', 'follow'), ('like', 'like'), ('share', 'repost'), ): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def send_webmentions(handler, activity_wrapped, proxy=None, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: handler: RequestHandler activity_wrapped: dict, AS1 activity response_props: passed through to the newly created Responses """ activity = common.redirect_unwrap(activity_wrapped) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(handler, '%s activities are not supported yet.' % verb) # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source or verb in ('create', 'post', 'update'): source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) tags = util.get_list(activity_wrapped, 'tags') obj_wrapped = activity_wrapped.get('object') if isinstance(obj_wrapped, dict): tags.extend(util.get_list(obj_wrapped, 'tags')) for tag in tags: if tag.get('objectType') == 'mention': url = tag.get('url') if url and url.startswith(appengine_config.HOST_URL): targets.append(redirect_unwrap(url)) if verb in ('follow', 'like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error(handler, "Couldn't find original post URL") if not targets: error(handler, "Couldn't find any target URLs in inReplyTo, object, or mention tags") # send webmentions and store Responses errors = [] for target in targets: if util.domain_from_link(target) == util.domain_from_link(source): logging.info('Skipping same-domain webmention from %s to %s', source, target) continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = (response.proxy_url() if verb in ('follow', 'like', 'share') or proxy else source) logging.info('Sending webmention from %s to %s', wm_source, target) wm = send.WebmentionSend(wm_source, target) if wm.send(headers=HEADERS): logging.info('Success: %s', wm.response) response.status = 'complete' else: logging.warning('Failed: %s', wm.error) errors.append(wm.error) response.status = 'error' response.put() if errors: msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors) error(handler, msg, status=errors[0].get('http_status'))
def json_to_object(mf2, actor=None): """Converts microformats2 JSON to an ActivityStreams object. Args: mf2: dict, decoded JSON microformats2 object actor: optional author AS actor object. usually comes from a rel="author" link. if mf2 has its own author, that will override this. Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} mf2 = copy.copy(mf2) props = mf2.setdefault('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None author = json_to_object(prop['author']) if prop.get('author') else actor # maps mf2 type to ActivityStreams objectType and optional verb. mf2_type_to_as_type = { 'rsvp': ('activity', rsvp_verb), 'invite': ('activity', 'invite'), 'repost': ('activity', 'share'), 'like': ('activity', 'like'), 'reply': ('comment', None), 'person': ('person', None), 'location': ('place', None), 'note': ('note', None), 'article': ('article', None), } mf2_types = mf2.get('type') or [] if 'h-geo' in mf2_types or 'p-location' in mf2_types: mf2_type = 'location' else: # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type # *is* a photo. so, special case photo type to fall through to underlying # mf2 type without photo. # https://github.com/snarfed/bridgy/issues/702 without_photo = copy.deepcopy(mf2) without_photo.get('properties', {}).pop('photo', None) mf2_type = mf2util.post_type_discovery(without_photo) as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None)) def absolute_urls(prop): return [ url for url in get_string_urls(props.get(prop, [])) # filter out relative and invalid URLs (mf2py gives absolute urls) if urlparse.urlparse(url).netloc ] urls = props.get('url') and get_string_urls(props.get('url')) # quotations: https://indieweb.org/quotation#How_to_markup attachments = [ json_to_object(quote) for quote in mf2.get('children', []) + props.get('quotation-of', []) if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', [])) ] # audio and video for type in 'audio', 'video': attachments.extend({ 'objectType': type, 'url': url } for url in get_string_urls(props.get(type, []))) obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{ 'value': u } for u in urls] if urls and len(urls) > 1 else None, 'image': [{ 'url': url } for url in util.dedupe_urls( absolute_urls('photo') + absolute_urls('featured'))], 'stream': [{ 'url': url } for url in absolute_urls('video')], 'location': json_to_object(prop.get('location')), 'replies': { 'items': [json_to_object(c) for c in props.get('comment', [])] }, 'tags': [{ 'objectType': 'hashtag', 'displayName': cat } if isinstance(cat, basestring) else json_to_object(cat) for cat in props.get('category', [])], 'attachments': attachments, } # mf2util uses the indieweb/mf2 location algorithm to collect location properties. interpreted = mf2util.interpret({'items': [mf2]}, None) if interpreted: loc = interpreted.get('location') if loc: obj['location']['objectType'] = 'place' lat, lng = loc.get('latitude'), loc.get('longitude') if lat and lng: try: obj['location']['latitude'] = float(lat) obj['location']['longitude'] = float(lng) # TODO fill in 'position', maybe using Source.postprocess_object? except ValueError: logging.warn( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ('like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else { 'url': target } # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) else: obj.update({ 'inReplyTo': [{ 'url': url } for url in get_string_urls(props.get('in-reply-to', []))], 'author': author, }) return util.trim_nulls(obj)
def template_vars(self, domain=None, url=None): logging.debug(f'Headers: {list(request.headers.items())}') if domain.split('.')[-1] in NON_TLDS: error(f"{domain} doesn't look like a domain", status=404) # find representative h-card. try url, then url's home page, then domain urls = [f'http://{domain}/'] if url: urls = [url, urllib.parse.urljoin(url, '/')] + urls for candidate in urls: resp = common.requests_get(candidate) parsed = util.parse_html(resp) mf2 = util.parse_mf2(parsed, url=resp.url) # logging.debug(f'Parsed mf2 for {resp.url}: {json_dumps(mf2, indent=2)}') hcard = mf2util.representative_hcard(mf2, resp.url) if hcard: logging.info( f'Representative h-card: {json_dumps(hcard, indent=2)}') break else: error( f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {resp.url}" ) logging.info(f'Generating WebFinger data for {domain}') key = models.MagicKey.get_or_create(domain) props = hcard.get('properties', {}) urls = util.dedupe_urls(props.get('url', []) + [resp.url]) canonical_url = urls[0] acct = f'{domain}@{domain}' for url in urls: if url.startswith('acct:'): urluser, urldomain = util.parse_acct_uri(url) if urldomain == domain: acct = f'{urluser}@{domain}' logging.info(f'Found custom username: acct:{acct}') break # discover atom feed, if any atom = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if atom and atom['href']: atom = urllib.parse.urljoin(resp.url, atom['href']) else: atom = 'https://granary.io/url?' + urllib.parse.urlencode( { 'input': 'html', 'output': 'atom', 'url': resp.url, 'hub': resp.url, }) # discover PuSH, if any for link in resp.headers.get('Link', '').split(','): match = common.LINK_HEADER_RE.match(link) if match and match.group(2) == 'hub': hub = match.group(1) else: hub = 'https://bridgy-fed.superfeedr.com/' # generate webfinger content data = util.trim_nulls({ 'subject': 'acct:' + acct, 'aliases': urls, 'magic_keys': [{ 'value': key.href() }], 'links': sum(([{ 'rel': 'http://webfinger.net/rel/profile-page', 'type': 'text/html', 'href': url, }] for url in urls if url.startswith("http")), []) + [{ 'rel': 'http://webfinger.net/rel/avatar', 'href': get_text(url), } for url in props.get('photo', [])] + [ { 'rel': 'canonical_uri', 'type': 'text/html', 'href': canonical_url, }, # ActivityPub { 'rel': 'self', 'type': common.CONTENT_TYPE_AS2, # WARNING: in python 2 sometimes request.host_url lost port, # http://localhost:8080 would become just http://localhost. no # clue how or why. pay attention here if that happens again. 'href': f'{request.host_url}{domain}', }, { 'rel': 'inbox', 'type': common.CONTENT_TYPE_AS2, 'href': f'{request.host_url}{domain}/inbox', }, # OStatus { 'rel': 'http://schemas.google.com/g/2010#updates-from', 'type': common.CONTENT_TYPE_ATOM, 'href': atom, }, { 'rel': 'hub', 'href': hub, }, { 'rel': 'magic-public-key', 'href': key.href(), }, { 'rel': 'salmon', 'href': f'{request.host_url}{domain}/salmon', } ] }) logging.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}') return data