def _get_person_tags(self, obj): """Extract person tags that refer to Flickr users. Uses https://www.flickr.com/services/api/flickr.urls.lookupUser.html to find the NSID for a particular URL. Args: obj: ActivityStreams object that may contain person targets Returns: a sequence of ActivityStream person objects augmented with 'id' equal to the Flickr user's NSID """ people = {} # maps id to tag for tag in obj.get('tags', []): url = tag.get('url', '') if (util.domain_from_link(url) == 'flickr.com' and tag.get('objectType') == 'person'): resp = self.call_api_method('flickr.urls.lookupUser', {'url': url}) id = resp.get('user', {}).get('id') if id: tag = copy.copy(tag) tag['id'] = id people[id] = tag return people.values()
def redirect_unwrap(val): """Removes our redirect wrapping from a URL, if it's there. url may be a string, dict, or list. dicts and lists are unwrapped recursively. Strings that aren't wrapped URLs are left unchanged. Args: url: string Returns: string, unwrapped url """ if isinstance(val, dict): return {k: redirect_unwrap(v) for k, v in val.items()} elif isinstance(val, list): return [redirect_unwrap(v) for v in val] elif isinstance(val, str): prefix = urllib.parse.urljoin(request.host_url, '/r/') if val.startswith(prefix): return util.follow_redirects(val[len(prefix):]).url elif val.startswith(request.host_url): domain = util.domain_from_link( urllib.parse.urlparse(val).path.strip('/')) return util.follow_redirects(domain).url return val
def __init__(self, instance, access_token, user_id=None, truncate_text_length=None): """Constructor. If user_id is not provided, it will be fetched via the API. Args: instance: string, base URL of Mastodon instance, eg https://mastodon.social/ user_id: string or integer, optional, current user's id (not username!) on this instance access_token: string, optional OAuth access token truncate_text_length: int, optional character limit for toots, overrides the default of 500 """ assert instance self.instance = self.BASE_URL = instance assert access_token self.access_token = access_token self.TRUNCATE_TEXT_LENGTH = ( truncate_text_length if truncate_text_length is not None else DEFAULT_TRUNCATE_TEXT_LENGTH) self.DOMAIN = util.domain_from_link(instance) if user_id: self.user_id = user_id else: creds = self._get(API_VERIFY_CREDENTIALS) self.user_id = creds['id']
def undo_follow(self, undo_unwrapped): """Replies to an AP Follow request with an Accept request. Args: undo_unwrapped: dict, AP Undo activity with redirect URLs unwrapped """ logging.info('Undoing Follow') follow = undo_unwrapped.get('object', {}) follower = follow.get('actor') followee = follow.get('object') if not follower or not followee: self.error( 'Undo of Follow requires object with actor and object. Got: %s' % follow) # deactivate Follower user_domain = util.domain_from_link(followee) follower_obj = Follower.get_by_id(Follower._id(user_domain, follower)) if follower_obj: logging.info('Marking %s as inactive' % follower_obj.key) follower_obj.status = 'inactive' follower_obj.put() else: logging.warning('No Follower found for %s %s', user_domain, follower)
def redir(to): """301 redirect to the embedded fully qualified URL. e.g. redirects /r/https://foo.com/bar?baz to https://foo.com/bar?baz """ if request.args: to += '?' + urllib.parse.urlencode(request.args) # some browsers collapse repeated /s in the path down to a single slash. # if that happened to this URL, expand it back to two /s. to = re.sub(r'^(https?:/)([^/])', r'\1/\2', to) if not to.startswith('http://') and not to.startswith('https://'): error(f'Expected fully qualified URL; got {to}') # check that we've seen this domain before so we're not an open redirect domains = set( (util.domain_from_link(to), urllib.parse.urlparse(to).hostname)) for domain in domains: if domain and MagicKey.get_by_id(domain): logging.info(f'Found MagicKey for domain {domain}') break else: logging.info(f'No user found for any of {domains}; returning 404') abort(404) # poor man's conneg, only handle single Accept values, not multiple with # priorities. if request.headers.get('Accept') in (common.CONTENT_TYPE_AS2, common.CONTENT_TYPE_AS2_LD): return convert_to_as2(to) # redirect logging.info(f'redirecting to {to}') return redirect(to, code=301)
def user_to_actor(self, user): """Converts a GitHub user to an ActivityStreams actor. Handles both v4 GraphQL and v3 REST API user objects. https://developer.github.com/v4/object/user/ https://developer.github.com/v3/users/ Args: user: dict, decoded JSON GitHub user Returns: an ActivityStreams actor dict, ready to be JSON-encoded """ actor = self._to_object(user) if not actor: return actor username = user.get('login') desc = user.get('bio') or user.get('description') actor.update({ # TODO: orgs, bots 'objectType': 'person', 'displayName': user.get('name') or username, 'username': username, 'email': user.get('email'), 'description': desc, 'summary': desc, 'image': { 'url': user.get('avatarUrl') or user.get('avatar_url') or user.get('url') }, 'location': { 'displayName': user.get('location') }, }) # extract web site links. extract_links uniquifies and preserves order urls = sum( ( util.extract_links(user.get(field)) for field in ( 'html_url', # REST 'url', # both 'websiteUrl', # GraphQL 'blog', # REST 'bio', # both )), []) urls = [ u for u in urls if util.domain_from_link(u) != 'api.github.com' ] if urls: actor['url'] = urls[0] if len(urls) > 1: actor['urls'] = [{'value': u} for u in urls] return self.postprocess_object(actor)
def webmention_endpoint_cache_key(url): """Returns memcache key for a cached webmention endpoint for a given URL. Example: 'W https snarfed.org' """ domain = util.domain_from_link(url) scheme = urlparse.urlparse(url).scheme return ' '.join(('W', scheme, domain))
def accept_follow(self, follow, follow_unwrapped): """Replies to an AP Follow request with an Accept request. Args: follow: dict, AP Follow activity follow_unwrapped: dict, same, except with redirect URLs unwrapped """ logging.info('Replying to Follow with Accept') followee = follow.get('object') followee_unwrapped = follow_unwrapped.get('object') follower = follow.get('actor') if not followee or not followee_unwrapped or not follower: common.error( self, 'Follow activity requires object and actor. Got: %s' % follow) inbox = follower.get('inbox') follower_id = follower.get('id') if not inbox or not follower_id: common.error(self, 'Follow actor requires id and inbox. Got: %s', follower) # store Follower user_domain = util.domain_from_link(followee_unwrapped) Follower.get_or_create(user_domain, follower_id, last_follow=json.dumps(follow)) # send AP Accept accept = { '@context': 'https://www.w3.org/ns/activitystreams', 'id': util.tag_uri(appengine_config.HOST, 'accept/%s/%s' % ((user_domain, follow.get('id')))), 'type': 'Accept', 'actor': followee, 'object': { 'type': 'Follow', 'actor': follower_id, 'object': followee, } } resp = send(accept, inbox, user_domain) self.response.status_int = resp.status_code self.response.write(resp.text) # send webmention common.send_webmentions(self, as2.to_as1(follow), proxy=True, protocol='activitypub', source_as2=json.dumps(follow_unwrapped))
def base_object(self, obj): """Returns the 'base' silo object that an object operates on. For example, if the object is a comment, this returns the post that it's a comment on. If it's an RSVP, this returns the event. The id in the returned object is silo-specific, ie not a tag URI. Subclasses may override this. Args: obj: ActivityStreams object Returns: dict, minimal ActivityStreams object. Usually has at least id; may also have url, author, etc. """ # look at in-reply-tos first, then objects (for likes and reposts). # technically, the ActivityStreams 'object' field is always supposed to be # singular, but microformats2.json_to_object() sometimes returns activities # that have a list value, e.g. likes or reposts of multiple objects. candidates = [] for field in ('inReplyTo', 'object'): objs = obj.get(field, []) if isinstance(objs, dict): candidates.append(objs) else: candidates += objs for base_obj in candidates: parsed_id = util.parse_tag_uri(base_obj.get('id', '')) if parsed_id: domain = parsed_id[0] else: domain = util.domain_from_link(base_obj.get('url', '')) if domain == self.DOMAIN: break else: return {} base_obj = copy.deepcopy(base_obj) id = base_obj.get('id') url = base_obj.get('url') if id: parsed = util.parse_tag_uri(id) if parsed: base_obj['id'] = parsed[1] elif url: base_obj['id'] = self.base_id(url) return base_obj
def base_object(self, obj): """Returns the 'base' silo object that an object operates on. For example, if the object is a comment, this returns the post that it's a comment on. If it's an RSVP, this returns the event. The id in the returned object is silo-specific, ie not a tag URI. Subclasses may override this. Args: obj: ActivityStreams object Returns: dict, minimal ActivityStreams object. Usually has at least id; may also have url, author, etc. """ # look at in-reply-tos first, then objects (for likes and reposts). # technically, the ActivityStreams 'object' field is always supposed to be # singular, but microformats2.json_to_object() sometimes returns activities # that have a list value, e.g. likes or reposts of multiple objects. candidates = [] for field in ("inReplyTo", "object"): objs = obj.get(field, []) if isinstance(objs, dict): candidates.append(objs) else: candidates += objs for base_obj in candidates: parsed_id = util.parse_tag_uri(base_obj.get("id", "")) if parsed_id: domain = parsed_id[0] else: domain = util.domain_from_link(base_obj.get("url", "")) if domain == self.DOMAIN: break else: return {} base_obj = copy.deepcopy(base_obj) id = base_obj.get("id") url = base_obj.get("url") if id: parsed = util.parse_tag_uri(id) if parsed: base_obj["id"] = parsed[1] elif url: path = urlparse.urlparse(url).path base_obj["id"] = path.rstrip("/").rsplit("/", 1)[-1] return base_obj
def user_to_actor(cls, user): """Converts a GitHub user to an ActivityStreams actor. Handles both v4 GraphQL and v3 REST API user objects. https://developer.github.com/v4/object/user/ https://developer.github.com/v3/users/ Args: user: dict, decoded JSON GitHub user Returns: an ActivityStreams actor dict, ready to be JSON-encoded """ actor = cls._to_object(user) if not actor: return actor username = user.get('login') desc = user.get('bio') or user.get('description') actor.update({ # TODO: orgs, bots 'objectType': 'person', 'displayName': user.get('name') or username, 'username': username, 'email': user.get('email'), 'description': desc, 'summary': desc, 'image': {'url': user.get('avatarUrl') or user.get('avatar_url') or user.get('url')}, 'location': {'displayName': user.get('location')}, }) # extract web site links. extract_links uniquifies and preserves order urls = sum((util.extract_links(user.get(field)) for field in ( 'html_url', # REST 'url', # both 'websiteUrl', # GraphQL 'blog', # REST 'bio', # both )), []) urls = [u for u in urls if util.domain_from_link(u) != 'api.github.com'] if urls: actor['url'] = urls[0] if len(urls) > 1: actor['urls'] = [{'value': u} for u in urls] return cls.postprocess_object(actor)
def base_object(self, obj): """Returns the 'base' silo object that an object operates on. For example, if the object is a comment, this returns the post that it's a comment on. If it's an RSVP, this returns the event. The id in the returned object is silo-specific, ie not a tag URI. Subclasses may override this. Args: obj: ActivityStreams object Returns: dict, minimal ActivityStreams object. Usually has at least id; may also have url, author, etc. """ # look at in-reply-tos first, then objects (for likes and reposts). # technically, the ActivityStreams 'object' field is always supposed to be # singular, but microformats2.json_to_object() sometimes returns activities # that have a list value, e.g. likes or reposts of multiple objects. candidates = [] for field in ('inReplyTo', 'object', 'target'): candidates += util.get_list(obj, field) for base_obj in candidates: parsed_id = util.parse_tag_uri(base_obj.get('id', '')) if parsed_id: domain = parsed_id[0] else: domain = util.domain_from_link(base_obj.get('url', '')) if domain == self.DOMAIN: break else: return {} base_obj = copy.deepcopy(base_obj) id = base_obj.get('id') url = base_obj.get('url') if id: parsed = util.parse_tag_uri(id) if parsed: base_obj['id'] = parsed[1] elif url: base_obj['id'] = self.base_id(url) return base_obj
def webmention_endpoint_cache_key(url): """Returns cache key for a cached webmention endpoint for a given URL. Example: 'W https snarfed.org /' If the URL is the home page, ie path is / , the key includes a / at the end, so that we cache webmention endpoints for home pages separate from other pages. https://github.com/snarfed/bridgy/issues/701 """ domain = util.domain_from_link(url) scheme = urllib.parse.urlparse(url).scheme parts = ['W', scheme, domain] if urllib.parse.urlparse(url).path in ('', '/'): parts.append('/') return ' '.join(parts)
def webmention_endpoint_cache_key(url): """Returns memcache key for a cached webmention endpoint for a given URL. Example: 'W https snarfed.org /' If the URL is the home page, ie path is / , the key includes a / at the end, so that we cache webmention endpoints for home pages separate from other pages. https://github.com/snarfed/bridgy/issues/701 """ domain = util.domain_from_link(url) scheme = urlparse.urlparse(url).scheme parts = ['W', scheme, domain] if urlparse.urlparse(url).path in ('', '/'): parts.append('/') return ' '.join(parts)
def send(activity, inbox_url, user_domain): """Sends an ActivityPub request to an inbox. Args: activity: dict, AS2 activity inbox_url: string user_domain: string, domain of the bridgy fed user sending the request Returns: requests.Response """ logging.info( 'Sending AP request from {user_domain}: {json_dumps(activity, indent=2)}' ) # prepare HTTP Signature (required by Mastodon) # https://w3c.github.io/activitypub/#authorization # https://tools.ietf.org/html/draft-cavage-http-signatures-07 # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846 acct = 'acct:%s@%s' % (user_domain, user_domain) key = MagicKey.get_or_create(user_domain) auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct, algorithm='rsa-sha256', sign_header='signature', headers=('Date', 'Digest', 'Host')) # deliver to inbox body = json_dumps(activity).encode() headers = { 'Content-Type': common.CONTENT_TYPE_AS2, # required for HTTP Signature # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3 'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'), # required by Mastodon # https://github.com/tootsuite/mastodon/pull/14556#issuecomment-674077648 'Digest': 'SHA-256=' + b64encode(sha256(body).digest()).decode(), 'Host': util.domain_from_link(inbox_url), } return common.requests_post(inbox_url, data=body, auth=auth, headers=headers)
def __init__(self, instance, access_token, user_id=None): """Constructor. If user_id is not provided, it will be fetched via the API. Args: instance: string, base URL of Mastodon instance, eg https://mastodon.social/ user_id: string or integer, optional, current user's id (not username!) on this instance access_token: string, optional OAuth access token """ assert instance self.instance = self.BASE_URL = instance assert access_token self.access_token = access_token self.DOMAIN = util.domain_from_link(instance) if user_id: self.user_id = user_id else: creds = self._get(API_VERIFY_CREDENTIALS) self.user_id = creds['id']
def redirect_unwrap(val): """Removes our redirect wrapping from a URL, if it's there. url may be a string, dict, or list. dicts and lists are unwrapped recursively. Strings that aren't wrapped URLs are left unchanged. """ if isinstance(val, dict): return {k: redirect_unwrap(v) for k, v in val.items()} elif isinstance(val, list): return [redirect_unwrap(v) for v in val] elif isinstance(val, basestring): if val.startswith(REDIRECT_PREFIX): return val[len(REDIRECT_PREFIX):] elif val.startswith(appengine_config.HOST_URL): return util.follow_redirects( util.domain_from_link(urlparse.urlparse(val).path.strip('/')), cache=memcache).url return val
def original_post_discovery(activity, domains=None, cache=None, include_redirect_sources=True, **kwargs): """Discovers original post links. This is a variation on http://indiewebcamp.com/original-post-discovery . It differs in that it finds multiple candidate links instead of one, and it doesn't bother looking for MF2 (etc) markup because the silos don't let you input it. More background: https://github.com/snarfed/bridgy/issues/51#issuecomment-136018857 Original post candidates come from the upstreamDuplicates, attachments, and tags fields, as well as links and permashortlinks/permashortcitations in the text content. Args: activity: activity dict domains: optional sequence of domains. If provided, only links to these domains will be considered original and stored in upstreamDuplicates. (Permashortcitations are exempt.) cache: optional, a cache object for storing resolved URL redirects. Passed to follow_redirects(). include_redirect_sources: boolean, whether to include URLs that redirect as well as their final destination URLs kwargs: passed to requests.head() when following redirects Returns: ([string original post URLs], [string mention URLs]) tuple """ obj = activity.get("object") or activity content = obj.get("content", "").strip() # find all candidate URLs tags = [ t.get("url") for t in obj.get("attachments", []) + obj.get("tags", []) if t.get("objectType") in ("article", "mention", None) ] candidates = tags + util.extract_links(content) + obj.get("upstreamDuplicates", []) # Permashortcitations (http://indiewebcamp.com/permashortcitation) are short # references to canonical copies of a given (usually syndicated) post, of # the form (DOMAIN PATH). We consider them an explicit original post link. candidates += [match.expand(r"http://\1/\2") for match in Source._PERMASHORTCITATION_RE.finditer(content)] candidates = set( filter( None, ( util.clean_url(url) for url in candidates # heuristic: ellipsized URLs are probably incomplete, so omit them. if url and not url.endswith("...") and not url.endswith(u"…") ), ) ) # check for redirect and add their final urls redirects = {} # maps final URL to original URL for redirects for url in list(candidates): resolved = follow_redirects(url, cache=cache, **kwargs) if resolved.url != url and resolved.headers.get("content-type", "").startswith("text/html"): redirects[resolved.url] = url candidates.add(resolved.url) # use domains to determine which URLs are original post links vs mentions originals = set() mentions = set() for url in util.dedupe_urls(candidates): if url in redirects.values(): # this is a redirected original URL. postpone and handle it when we hit # its final URL so that we know the final domain. continue which = originals if not domains or util.domain_from_link(url) in domains else mentions which.add(url) redirected_from = redirects.get(url) if redirected_from and include_redirect_sources: which.add(redirected_from) logging.info("Original post discovery found original posts %s, mentions %s", originals, mentions) return originals, mentions
def host_url(handler): domain = util.domain_from_link(handler.request.host_url) return HOST_URL if domain in OTHER_DOMAINS else handler.request.host_url
def host_url(handler): domain = util.domain_from_link(handler.request.host_url) return (HOST_URL if util.domain_or_parent_in(domain, OTHER_DOMAINS) else handler.request.host_url)
def request_headers(url=None, source=None): if (url and util.domain_from_link(url) in CONNEG_DOMAINS or source and source.bridgy_path() in CONNEG_PATHS): return REQUEST_HEADERS_CONNEG return REQUEST_HEADERS
def send_webmentions(activity_wrapped, proxy=None, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: activity_wrapped: dict, AS1 activity response_props: passed through to the newly created Responses """ activity = redirect_unwrap(activity_wrapped) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(f'{verb} activities are not supported yet.') # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source or verb in ('create', 'post', 'update'): source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) tags = util.get_list(activity_wrapped, 'tags') obj_wrapped = activity_wrapped.get('object') if isinstance(obj_wrapped, dict): tags.extend(util.get_list(obj_wrapped, 'tags')) for tag in tags: if tag.get('objectType') == 'mention': url = tag.get('url') if url and url.startswith(request.host_url): targets.append(redirect_unwrap(url)) if verb in ('follow', 'like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error("Couldn't find original post URL") if not targets: error( "Couldn't find any target URLs in inReplyTo, object, or mention tags" ) # send webmentions and store Responses errors = [] # stores (code, body) tuples for target in targets: if util.domain_from_link(target) == util.domain_from_link(source): logging.info( f'Skipping same-domain webmention from {source} to {target}') continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = (response.proxy_url() if verb in ('follow', 'like', 'share') or proxy else source) logging.info(f'Sending webmention from {wm_source} to {target}') try: endpoint = webmention.discover(target, headers=HEADERS).endpoint if endpoint: webmention.send(endpoint, wm_source, target, headers=HEADERS) response.status = 'complete' logging.info('Success!') else: response.status = 'ignored' logging.info('Ignoring.') except BaseException as e: errors.append(util.interpret_http_exception(e)) response.put() if errors: msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors) error(msg, status=int(errors[0][0] or 502))
def original_post_discovery(activity, domains=None, cache=None, include_redirect_sources=True, **kwargs): """Discovers original post links. This is a variation on http://indiewebcamp.com/original-post-discovery . It differs in that it finds multiple candidate links instead of one, and it doesn't bother looking for MF2 (etc) markup because the silos don't let you input it. More background: https://github.com/snarfed/bridgy/issues/51#issuecomment-136018857 Original post candidates come from the upstreamDuplicates, attachments, and tags fields, as well as links and permashortlinks/permashortcitations in the text content. Args: activity: activity dict domains: optional sequence of domains. If provided, only links to these domains will be considered original and stored in upstreamDuplicates. (Permashortcitations are exempt.) cache: optional, a cache object for storing resolved URL redirects. Passed to follow_redirects(). include_redirect_sources: boolean, whether to include URLs that redirect as well as their final destination URLs kwargs: passed to requests.head() when following redirects Returns: ([string original post URLs], [string mention URLs]) tuple """ obj = activity.get('object') or activity content = obj.get('content', '').strip() # find all candidate URLs tags = [ t.get('url') for t in obj.get('attachments', []) + obj.get('tags', []) if t.get('objectType') in ('article', 'mention', None) ] candidates = tags + util.extract_links(content) + obj.get( 'upstreamDuplicates', []) # Permashortcitations (http://indiewebcamp.com/permashortcitation) are short # references to canonical copies of a given (usually syndicated) post, of # the form (DOMAIN PATH). We consider them an explicit original post link. candidates += [ match.expand(r'http://\1/\2') for match in Source._PERMASHORTCITATION_RE.finditer(content) ] candidates = set( filter( None, ( util.clean_url(url) for url in candidates # heuristic: ellipsized URLs are probably incomplete, so omit them. if url and not url.endswith('...') and not url.endswith(u'…')))) # check for redirect and add their final urls redirects = {} # maps final URL to original URL for redirects for url in list(candidates): resolved = util.follow_redirects(url, cache=cache, **kwargs) if (resolved.url != url and resolved.headers.get( 'content-type', '').startswith('text/html')): redirects[resolved.url] = url candidates.add(resolved.url) # use domains to determine which URLs are original post links vs mentions originals = set() mentions = set() for url in util.dedupe_urls(candidates): if url in redirects.values(): # this is a redirected original URL. postpone and handle it when we hit # its final URL so that we know the final domain. continue domain = util.domain_from_link(url) which = (originals if not domains or util.domain_or_parent_in( domain, domains) else mentions) which.add(url) redirected_from = redirects.get(url) if redirected_from and include_redirect_sources: which.add(redirected_from) logging.info( 'Original post discovery found original posts %s, mentions %s', originals, mentions) return originals, mentions
def request_headers(url=None, source=None): if (url and util.domain_from_link(url) in CONNEG_DOMAINS or source and source.bridgy_path() in CONNEG_PATHS): return REQUEST_HEADERS_CONNEG return {}
def host_url(path_query=None): domain = util.domain_from_link(request.host_url) base = (HOST_URL if util.domain_or_parent_in(domain, OTHER_DOMAINS) else request.host_url) return urllib.parse.urljoin(base, path_query)
def send_webmentions(handler, activity_wrapped, proxy=None, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: handler: RequestHandler activity_wrapped: dict, AS1 activity response_props: passed through to the newly created Responses """ activity = common.redirect_unwrap(activity_wrapped) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(handler, '%s activities are not supported yet.' % verb) # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source or verb in ('create', 'post', 'update'): source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) tags = util.get_list(activity_wrapped, 'tags') obj_wrapped = activity_wrapped.get('object') if isinstance(obj_wrapped, dict): tags.extend(util.get_list(obj_wrapped, 'tags')) for tag in tags: if tag.get('objectType') == 'mention': url = tag.get('url') if url and url.startswith(appengine_config.HOST_URL): targets.append(redirect_unwrap(url)) if verb in ('follow', 'like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error(handler, "Couldn't find original post URL") if not targets: error(handler, "Couldn't find any target URLs in inReplyTo, object, or mention tags") # send webmentions and store Responses errors = [] for target in targets: if util.domain_from_link(target) == util.domain_from_link(source): logging.info('Skipping same-domain webmention from %s to %s', source, target) continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = (response.proxy_url() if verb in ('follow', 'like', 'share') or proxy else source) logging.info('Sending webmention from %s to %s', wm_source, target) wm = send.WebmentionSend(wm_source, target) if wm.send(headers=HEADERS): logging.info('Success: %s', wm.response) response.status = 'complete' else: logging.warning('Failed: %s', wm.error) errors.append(wm.error) response.status = 'error' response.put() if errors: msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors) error(handler, msg, status=errors[0].get('http_status'))