def verify(self, force=False): """Checks that this source is ready to be used. For blog and listen sources, this fetches their front page HTML and discovers their webmention endpoint. For publish sources, this checks that they have a domain. May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`. Args: force: if True, fully verifies (e.g. re-fetches the blog's HTML and performs webmention discovery) even we already think this source is verified. """ author_urls = [u for u, d in zip(self.get_author_urls(), self.domains) if not util.in_webmention_blocklist(d)] if ((self.verified() and not force) or self.status == 'disabled' or not self.features or not author_urls): return author_url = author_urls[0] try: got = webmention.discover(author_url, timeout=util.HTTP_TIMEOUT) self.webmention_endpoint = got.endpoint self._fetched_html = got.response.text except BaseException as e: logger.info('Error discovering webmention endpoint', exc_info=e) self.webmention_endpoint = None self.put()
def send_webmentions(activity_wrapped, proxy=None, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: activity_wrapped: dict, AS1 activity response_props: passed through to the newly created Responses """ activity = redirect_unwrap(activity_wrapped) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: error(f'{verb} activities are not supported yet.') # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source or verb in ('create', 'post', 'update'): source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) tags = util.get_list(activity_wrapped, 'tags') obj_wrapped = activity_wrapped.get('object') if isinstance(obj_wrapped, dict): tags.extend(util.get_list(obj_wrapped, 'tags')) for tag in tags: if tag.get('objectType') == 'mention': url = tag.get('url') if url and url.startswith(request.host_url): targets.append(redirect_unwrap(url)) if verb in ('follow', 'like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: error("Couldn't find original post URL") if not targets: error( "Couldn't find any target URLs in inReplyTo, object, or mention tags" ) # send webmentions and store Responses errors = [] # stores (code, body) tuples for target in targets: if util.domain_from_link(target) == util.domain_from_link(source): logging.info( f'Skipping same-domain webmention from {source} to {target}') continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = (response.proxy_url() if verb in ('follow', 'like', 'share') or proxy else source) logging.info(f'Sending webmention from {wm_source} to {target}') try: endpoint = webmention.discover(target, headers=HEADERS).endpoint if endpoint: webmention.send(endpoint, wm_source, target, headers=HEADERS) response.status = 'complete' logging.info('Success!') else: response.status = 'ignored' logging.info('Ignoring.') except BaseException as e: errors.append(util.interpret_http_exception(e)) response.put() if errors: msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors) error(msg, status=int(errors[0][0] or 502))
def do_send_webmentions(self): urls = self.entity.unsent + self.entity.error + self.entity.failed unsent = set() self.entity.error = [] self.entity.failed = [] for orig_url in urls: # recheck the url here since the checks may have failed during the poll # or streaming add. url, domain, ok = util.get_webmention_target(orig_url) if ok: if len(url) <= _MAX_STRING_LENGTH: unsent.add(url) else: logging.info('Giving up on target URL over %s chars! %s', _MAX_STRING_LENGTH, url) self.entity.failed.append(orig_url) self.entity.unsent = sorted(unsent) while self.entity.unsent: target = self.entity.unsent.pop(0) source_url = self.source_url(target) logging.info('Webmention from %s to %s', source_url, target) # see if we've cached webmention discovery for this domain. the cache # value is a string URL endpoint if discovery succeeded, NO_ENDPOINT if # no endpoint was ofund. cache_key = util.webmention_endpoint_cache_key(target) endpoint = util.webmention_endpoint_cache.get(cache_key) if endpoint: logging.info('Using cached webmention endpoint %r: %s', cache_key, endpoint) # send! and handle response or error try: resp = None headers = util.request_headers(source=self.source) if not endpoint: endpoint, resp = webmention.discover(target, headers=headers) with util.webmention_endpoint_cache_lock: util.webmention_endpoint_cache[ cache_key] = endpoint or NO_ENDPOINT if endpoint and endpoint != NO_ENDPOINT: logging.info('Sending...') resp = webmention.send(endpoint, source_url, target, timeout=999, headers=headers) logging.info('Sent! %s', resp) self.record_source_webmention(endpoint, target) self.entity.sent.append(target) else: logging.info('Giving up this target.') self.entity.skipped.append(target) except ValueError: logging.info('Bad URL; giving up this target.') self.entity.skipped.append(target) except BaseException as e: logging.info('', exc_info=True) # Give up on 4XX and DNS errors; we don't expect retries to succeed. code, _ = util.interpret_http_exception(e) if (code and code.startswith('4')) or 'DNS lookup failed' in str(e): logging.info('Giving up this target.') self.entity.failed.append(target) else: self.fail(f'Error sending to endpoint: {resp}', level=logging.INFO) self.entity.error.append(target) if target in self.entity.unsent: self.entity.unsent.remove(target) if self.entity.error: logging.info('Propagate task failed') self.release('error') else: self.complete()