def post(self): source = self.load_source() # validate URL, find silo post url = util.get_required_param(self, 'url') domain = util.domain_from_link(url) path = urllib.parse.urlparse(url).path msg = 'Discovering now. Refresh in a minute to see the results!' if domain == source.GR_CLASS.DOMAIN: post_id = source.GR_CLASS.post_id(url) if post_id: type = 'event' if path.startswith('/events/') else None util.add_discover_task(source, post_id, type=type) else: msg = "Sorry, that doesn't look like a %s post URL." % source.GR_CLASS.NAME elif util.domain_or_parent_in(domain, source.domains): synd_links = original_post_discovery.process_entry( source, url, {}, False, []) if synd_links: for link in synd_links: util.add_discover_task(source, source.GR_CLASS.post_id(link)) source.updates = {'last_syndication_url': util.now_fn()} models.Source.put_updates(source) else: msg = 'Failed to fetch %s or find a %s syndication link.' % ( util.pretty_link(url), source.GR_CLASS.NAME) else: msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME self.messages.add(msg) self.redirect(source.bridgy_url(self))
def post(self): # load source try: source = ndb.Key(urlsafe=util.get_required_param(self, 'source_key')).get() if not source: self.abort(400, 'Source key not found') except ProtocolBufferDecodeError: logging.exception('Bad value for source_key') self.abort(400, 'Bad value for source_key') # validate URL, find silo post url = util.get_required_param(self, 'url') domain = util.domain_from_link(url) msg = 'Discovering now. Refresh in a minute to see the results!' if domain == source.GR_CLASS.DOMAIN: post_id = source.GR_CLASS.post_id(url) util.add_discover_task(source, post_id) elif util.domain_or_parent_in(domain, source.domains): synd_links = original_post_discovery.process_entry(source, url, {}, False, []) if synd_links: for link in synd_links: util.add_discover_task(source, source.GR_CLASS.post_id(link)) else: msg = 'Failed to fetch %s or find a %s syndication link.' % ( util.pretty_link(url), source.GR_CLASS.NAME) else: msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME self.messages.add(msg) self.redirect(source.bridgy_url(self))
def find_mention_item(self, items): """Returns the mf2 item that mentions (or replies to, likes, etc) the target. May modify the items arg, e.g. may set or replace content.html or content.value. Args: items: sequence of mf2 item dicts Returns: mf2 item dict or None """ # find target URL in source for item in items: props = item.setdefault('properties', {}) # find first non-empty content element content = props.setdefault('content', [{}])[0] text = content.get('html') or content.get('value') for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of': urls = [urlparse.urldefrag(u)[0] for u in microformats2.get_string_urls(props.get(type, []))] if self.any_target_in(urls): break else: if text and self.any_target_in(text): type = 'post' url = first_value(props, 'url') or self.source_url name = first_value(props, 'name') or first_value(props, 'summary') text = content['html'] = ('mentioned this in %s.' % util.pretty_link(url, text=name, max_length=280)) else: type = None if type: # found the target! rsvp = first_value(props, 'rsvp') if rsvp: self.entity.type = 'rsvp' if not text: content['value'] = 'RSVPed %s.' % rsvp else: self.entity.type = {'in-reply-to': 'comment', 'like-of': 'like', 'repost-of': 'repost', }.get(type, type) if not text: content['value'] = {'comment': 'replied to this.', 'like': 'liked this.', 'repost': 'reposted this.', }[self.entity.type] return item # check children in case this is eg an h-feed found = self.find_mention_item(item.get('children', [])) if found: return found return None
def fetch_mf2(self, url): """Fetches a URL and extracts its mf2 data. Side effects: sets self.entity.html on success, calls self.error() on errors. Args: url: string Returns: (requests.Response, mf2 data dict) on success, None on failure """ try: fetched = util.requests_get(url) fetched.raise_for_status() except BaseException as e: util.interpret_http_exception(e) # log exception return self.error('Could not fetch source URL %s' % url) if self.entity: self.entity.html = fetched.text # .text is decoded unicode string, .content is raw bytes. if the HTTP # headers didn't specify a charset, pass raw bytes to BeautifulSoup so it # can look for a <meta> tag with a charset and decode. text = (fetched.text if 'charset' in fetched.headers.get('content-type', '') else fetched.content) doc = BeautifulSoup(text) # special case tumblr's markup: div#content > div.post > div.copy # convert to mf2. contents = doc.find_all(id='content') if contents: post = contents[0].find_next(class_='post') if post: post['class'] = 'h-entry' copy = post.find_next(class_='copy') if copy: copy['class'] = 'e-content' photo = post.find_next(class_='photo-wrapper') if photo: img = photo.find_next('img') if img: img['class'] = 'u-photo' doc = unicode(post) # parse microformats, convert to ActivityStreams data = parser.Parser(doc=doc, url=fetched.url).to_dict() logging.debug('Parsed microformats2: %s', json.dumps(data, indent=2)) items = data.get('items', []) if not items or not items[0]: return self.error('No microformats2 data found in ' + fetched.url, data=data, html=""" No <a href="http://microformats.org/get-started">microformats</a> or <a href="http://microformats.org/wiki/microformats2">microformats2</a> found in <a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a> for details (skip to level 2, <em>Publishing on the IndieWeb</em>). """ % (fetched.url, util.pretty_link(fetched.url))) return fetched, data
def post(self): source = self.load_source() # validate URL, find silo post url = util.get_required_param(self, 'url') domain = util.domain_from_link(url) path = urlparse.urlparse(url).path msg = 'Discovering now. Refresh in a minute to see the results!' if domain == source.GR_CLASS.DOMAIN: post_id = source.GR_CLASS.post_id(url) if post_id: type = 'event' if path.startswith('/events/') else None util.add_discover_task(source, post_id, type=type) else: msg = "Sorry, that doesn't look like a %s post URL." % source.GR_CLASS.NAME elif util.domain_or_parent_in(domain, source.domains): synd_links = original_post_discovery.process_entry(source, url, {}, False, []) if synd_links: for link in synd_links: util.add_discover_task(source, source.GR_CLASS.post_id(link)) source.updates = {'last_syndication_url': util.now_fn()} models.Source.put_updates(source) else: msg = 'Failed to fetch %s or find a %s syndication link.' % ( util.pretty_link(url), source.GR_CLASS.NAME) else: msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME self.messages.add(msg) self.redirect(source.bridgy_url(self))
def template_vars(self): entities = [] for cls in (Response, ): # BlogPost for e in cls.query().order(-cls.updated): if (len(entities) >= self.NUM_ENTITIES or e.updated < datetime.datetime.now() - datetime.timedelta(hours=1)): break elif (not e.error and not e.unsent) or e.status == 'complete': continue e.links = [ util.pretty_link(u, new_tab=True) for u in e.error + e.failed ] if e.key.kind() == 'Response': e.response = json.loads(e.response_json) e.activities = [json.loads(a) for a in e.activities_json] else: e.response = {'content': '[BlogPost]'} e.activities = [{'url': e.key.id()}] entities.append(e) entities.sort(key=lambda e: (e.source, e.activities, e.response)) return {'responses': entities}
def find_mention_item(self, data): """Returns the mf2 item that mentions (or replies to, likes, etc) the target. May modify the data arg, e.g. may set or replace content.html or content.value. Args: data: mf2 data dict Returns: mf2 item dict or None """ # find target URL in source for item in data.get('items', []): props = item.setdefault('properties', {}) # find first non-empty content element content = props.setdefault('content', [{}])[0] text = content.get('html') or content.get('value') for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of': urls = [ urlparse.urldefrag(u)[0] for u in microformats2.get_string_urls(props.get(type, [])) ] if self.any_target_in(urls): break else: if not text or not self.any_target_in(text): continue type = 'post' url = first_value(props, 'url') or self.source_url name = first_value(props, 'name') or first_value( props, 'summary') text = content['html'] = ( 'mentioned this in %s.' % util.pretty_link(url, text=name, max_length=280)) if type: # found the target! rsvp = first_value(props, 'rsvp') if rsvp: self.entity.type = 'rsvp' if not text: content['value'] = 'RSVPed %s.' % rsvp else: self.entity.type = { 'in-reply-to': 'comment', 'like-of': 'like', 'repost-of': 'repost', }.get(type, type) if not text: content['value'] = { 'comment': 'replied to this.', 'like': 'liked this.', 'repost': 'reposted this.', }[self.entity.type] return item return None
def process_webmention_links(self, e): """Generates pretty HTML for the links in a :class:`Webmentions` entity. Args: e: :class:`Webmentions` subclass (:class:`Response` or :class:`BlogPost`) """ link = lambda url, g: util.pretty_link( url, glyphicon=g, attrs={'class': 'original-post u-bridgy-target'}, new_tab=True) return util.trim_nulls({ 'Failed': set(link(url, 'exclamation-sign') for url in e.error + e.failed), 'Sending': set( link(url, 'transfer') for url in e.unsent if url not in e.error), 'Sent': set( link(url, None) for url in e.sent if url not in (e.error + e.unsent)), 'No <a href="http://indiewebify.me/#send-webmentions">webmention</a> ' 'support': set(link(url, None) for url in e.skipped), })
def find_mention_item(self, data): """Returns the mf2 item that mentions (or replies to, likes, etc) the target. May modify the data arg, e.g. may set or replace content.html or content.value. Args: data: mf2 data dict Returns: mf2 item dict or None """ # find target URL in source for item in data.get('items', []): props = item.setdefault('properties', {}) # find first non-empty content element content = props.setdefault('content', [{}])[0] text = content.get('html') or content.get('value') for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of': urls = [urlparse.urldefrag(u)[0] for u in microformats2.get_string_urls(props.get(type, []))] if self.any_target_in(urls): break else: if not text or not self.any_target_in(text): continue type = 'post' url = first_value(props, 'url') or self.source_url name = first_value(props, 'name') or first_value(props, 'summary') text = content['html'] = ('mentioned this in %s.' % util.pretty_link(url, text=name)) if type: # found the target! rsvp = first_value(props, 'rsvp') if rsvp: self.entity.type = 'rsvp' if not text: content['value'] = 'RSVPed %s.' % rsvp else: self.entity.type = {'in-reply-to': 'comment', 'like-of': 'like', 'repost-of': 'repost', }.get(type, type) if not text: content['value'] = {'comment': 'replied to this.', 'like': 'liked this.', 'repost': 'reposted this.', }[self.entity.type] return item return None
def fetch_mf2(self, url): """Fetches a URL and extracts its mf2 data. Side effects: sets self.entity.html on success, calls self.error() on errors. Args: url: string Returns: (requests.Response, mf2 data dict) on success, None on failure """ try: fetched = requests.get(url, timeout=HTTP_TIMEOUT) fetched.raise_for_status() except BaseException: return self.error('Could not fetch source URL %s' % url) if self.entity: self.entity.html = fetched.text doc = BeautifulSoup(fetched.text) # special case tumblr's markup: div#content > div.post > div.copy # convert to mf2. contents = doc.find_all(id='content') if contents: post = contents[0].find_next(class_='post') if post: post['class'] = 'h-entry' copy = post.find_next(class_='copy') if copy: copy['class'] = 'e-content' photo = post.find_next(class_='photo-wrapper') if photo: img = photo.find_next('img') if img: img['class'] = 'u-photo' doc = unicode(post) # parse microformats, convert to ActivityStreams data = parser.Parser(doc=doc, url=fetched.url).to_dict() logging.debug('Parsed microformats2: %s', pprint.pformat(data)) items = data.get('items', []) if not items or not items[0]: return self.error('No microformats2 data found in ' + fetched.url, data=data, html=""" No <a href="http://microformats.org/get-started">microformats</a> or <a href="http://microformats.org/wiki/microformats2">microformats2</a> found in <a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a> for details (skip to level 2, <em>Publishing on the IndieWeb</em>). """ % (fetched.url, util.pretty_link(fetched.url))) return fetched, data
def _find_source(self, source_cls, url, domain): """Returns the source that should publish a post URL, or None if not found. Args: source_cls: :class:`models.Source` subclass for this silo url: string domain: string, url's domain Returns: :class:`models.Source` """ domain = domain.lower() sources = source_cls.query().filter( source_cls.domains == domain).fetch(100) if not sources: self.error( "Could not find <b>%(type)s</b> account for <b>%(domain)s</b>. Check that your %(type)s profile has %(domain)s in its <em>web site</em> or <em>link</em> field, then try signing up again." % { 'type': source_cls.GR_CLASS.NAME, 'domain': domain }) return current_url = '' sources_ready = [] best_match = None for source in sources: logging.info('Source: %s , features %s, status %s, poll status %s', source.bridgy_url(self), source.features, source.status, source.poll_status) if source.status != 'disabled' and 'publish' in source.features: # use a source that has a domain_url matching the url provided, # including path. find the source with the closest match. sources_ready.append(source) schemeless_url = util.schemeless(url.lower()).strip('/') for domain_url in source.domain_urls: schemeless_domain_url = util.schemeless( domain_url.lower()).strip('/') if (schemeless_url.startswith(schemeless_domain_url) and len(domain_url) > len(current_url)): current_url = domain_url best_match = source if best_match: return best_match elif sources_ready: self.error( 'No account found that matches %s. Check that <a href="%s/about#profile-link">the web site URL is in your silo profile</a>, then <a href="%s/">sign up again</a>.' % (self.request.host_url, util.pretty_link(url), self.request.host_url)) else: self.error( 'Publish is not enabled for your account. <a href="%s/">Try signing up!</a>' % self.request.host_url)
def finish(self, auth_entity, state=None): if not auth_entity: self.maybe_add_or_delete_source(Medium, auth_entity, state) return user = json_loads(auth_entity.user_json)['data'] username = user['username'] if not username.startswith('@'): username = '******' + username # fetch publications this user contributes or subscribes to. # (sadly medium's API doesn't tell us the difference unless we fetch each # pub's metadata separately.) # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications auth_entity.publications_json = auth_entity.get( oauth_medium.API_BASE + 'users/%s/publications' % user['id']).text auth_entity.put() pubs = json_loads(auth_entity.publications_json).get('data') if not pubs: self.maybe_add_or_delete_source(Medium, auth_entity, state, id=username) return # add user profile to start of pubs list user['id'] = username pubs.insert(0, user) vars = { 'action': '/medium/add', 'state': state, 'auth_entity_key': auth_entity.key.urlsafe(), 'blogs': [{ 'id': p['id'], 'title': p.get('name', ''), 'url': p.get('url', ''), 'pretty_url': util.pretty_link(str(p.get('url', ''))), 'image': p.get('imageUrl', ''), } for p in pubs if p.get('id')], } logging.info('Rendering choose_blog.html with %s', vars) self.response.headers['Content-Type'] = 'text/html' self.response.out.write( JINJA_ENV.get_template('choose_blog.html').render(**vars))
def post(self): source = self.load_source() redirect_url = '%s?%s' % (self.request.path, urllib.parse.urlencode({ 'source_key': source.key.urlsafe().decode(), })) add = self.request.get('add') delete = self.request.get('delete') if (add and delete) or (not add and not delete): self.abort(400, 'Either add or delete param (but not both) required') link = util.pretty_link(add or delete) if add: resolved = Source.resolve_profile_url(add) if resolved: if resolved in source.domain_urls: self.messages.add('%s already exists.' % link) else: source.domain_urls.append(resolved) domain = util.domain_from_link(resolved) source.domains.append(domain) source.put() self.messages.add('Added %s.' % link) else: self.messages.add( "%s doesn't look like your web site. Try again?" % link) else: assert delete try: source.domain_urls.remove(delete) except ValueError: self.abort( 400, "%s not found in %s's current web sites" % (delete, source.label())) domain = util.domain_from_link(delete) if domain not in set( util.domain_from_link(url) for url in source.domain_urls): source.domains.remove(domain) source.put() self.messages.add('Removed %s.' % link) self.redirect(redirect_url)
def process_webmention_links(self, e): """Generates pretty HTML for the links in a BlogWebmention entity. Args: e: BlogWebmention subclass (Response or BlogPost) """ link = lambda url, g: util.pretty_link( url, glyphicon=g, attrs={'class': 'original-post u-bridgy-target'}, new_tab=True) return util.trim_nulls({ 'Failed': set(link(url, 'exclamation-sign') for url in e.error + e.failed), 'Sending': set(link(url, 'transfer') for url in e.unsent if url not in e.error), 'Sent': set(link(url, None) for url in e.sent if url not in (e.error + e.unsent)), 'No <a href="http://indiewebify.me/#send-webmentions">webmention</a> ' 'support': set(link(url, None) for url in e.skipped), })
def _find_source(self, source_cls, url, domain): """Returns the source that should publish a post URL, or None if not found. Args: source_cls: :class:`models.Source` subclass for this silo url: string domain: string, url's domain Returns: :class:`models.Source` """ domain = domain.lower() sources = source_cls.query().filter(source_cls.domains == domain).fetch(100) if not sources: self.error("Could not find <b>%(type)s</b> account for <b>%(domain)s</b>. Check that your %(type)s profile has %(domain)s in its <em>web site</em> or <em>link</em> field, then try signing up again." % {'type': source_cls.GR_CLASS.NAME, 'domain': domain}) return current_url = '' sources_ready = [] best_match = None for source in sources: logging.info('Source: %s , features %s, status %s, poll status %s', source.bridgy_url(self), source.features, source.status, source.poll_status) if source.status != 'disabled' and 'publish' in source.features: # use a source that has a domain_url matching the url provided, # including path. find the source with the closest match. sources_ready.append(source) schemeless_url = util.schemeless(url.lower()).strip('/') for domain_url in source.domain_urls: schemeless_domain_url = util.schemeless(domain_url.lower()).strip('/') if (schemeless_url.startswith(schemeless_domain_url) and len(domain_url) > len(current_url)): current_url = domain_url best_match = source if best_match: return best_match elif sources_ready: self.error( 'No account found that matches %s. Check that <a href="/about#profile-link">the web site URL is in your silo profile</a>, then <a href="/">sign up again</a>.' % util.pretty_link(url)) else: self.error('Publish is not enabled for your account. <a href="/">Try signing up!</a>')
def finish(self, auth_entity, state=None): if not auth_entity: self.maybe_add_or_delete_source(Medium, auth_entity, state) return user = json.loads(auth_entity.user_json)['data'] username = user['username'] if not username.startswith('@'): username = '******' + username # fetch publications this user contributes or subscribes to. # (sadly medium's API doesn't tell us the difference unless we fetch each # pub's metadata separately.) # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications auth_entity.publications_json = auth_entity.get( oauth_medium.API_BASE + 'users/%s/publications' % user['id']).text auth_entity.put() pubs = json.loads(auth_entity.publications_json).get('data') if not pubs: self.maybe_add_or_delete_source(Medium, auth_entity, state, id=username) return # add user profile to start of pubs list user['id'] = username pubs.insert(0, user) vars = { 'action': '/medium/add', 'state': state, 'auth_entity_key': auth_entity.key.urlsafe(), 'blogs': [{ 'id': p['id'], 'title': p.get('name', ''), 'url': p.get('url', ''), 'pretty_url': util.pretty_link(str(p.get('url', ''))), 'image': p.get('imageUrl', ''), } for p in pubs if p.get('id')], } logging.info('Rendering choose_blog.html with %s', vars) self.response.headers['Content-Type'] = 'text/html' self.response.out.write(JINJA_ENV.get_template('choose_blog.html').render(**vars))
def edit_websites_post(): source = util.load_source() redirect_url = f'{request.path}?{urllib.parse.urlencode({"source_key": source.key.urlsafe().decode()})}' add = request.values.get('add') delete = request.values.get('delete') if (add and delete) or (not add and not delete): error('Either add or delete param (but not both) required') link = util.pretty_link(add or delete) if add: resolved = Source.resolve_profile_url(add) if resolved: if resolved in source.domain_urls: flash(f'{link} already exists.') else: source.domain_urls.append(resolved) domain = util.domain_from_link(resolved) source.domains.append(domain) source.put() flash(f'Added {link}.') else: flash(f"{link} doesn't look like your web site. Try again?") else: assert delete try: source.domain_urls.remove(delete) except ValueError: error( f"{delete} not found in {source.label()}'s current web sites") domain = util.domain_from_link(delete) if domain not in { util.domain_from_link(url) for url in source.domain_urls }: source.domains.remove(domain) source.put() flash(f'Removed {link}.') return redirect(redirect_url)
def template_vars(self): responses = [] # Find the most recently propagated responses with error URLs for r in Response.query().order(-Response.updated): if (len(responses) >= self.NUM_RESPONSES or r.updated < datetime.datetime.now() - datetime.timedelta(hours=1)): break elif not r.error or r.status == 'complete': continue # r.source = r.source.get() r.links = [util.pretty_link(u, new_tab=True) for u in r.error + r.failed] r.response = json.loads(r.response_json) r.activities = [json.loads(a) for a in r.activities_json] responses.append(r) responses.sort(key=lambda r: (r.source, r.activities, r.response)) return {'responses': responses}
def post(self): source = self.load_source() redirect_url = '%s?%s' % (self.request.path, urllib.urlencode({ 'source_key': source.key.urlsafe(), })) add = self.request.get('add') delete = self.request.get('delete') if (add and delete) or (not add and not delete): self.abort(400, 'Either add or delete param (but not both) required') link = util.pretty_link(add or delete) if add: resolved = Source.resolve_profile_url(add) if resolved: if resolved in source.domain_urls: self.messages.add('%s already exists.' % link) else: source.domain_urls.append(resolved) domain = util.domain_from_link(resolved) source.domains.append(domain) source.put() self.messages.add('Added %s.' % link) else: self.messages.add("%s doesn't look like your web site. Try again?" % link) else: assert delete try: source.domain_urls.remove(delete) except ValueError: self.abort(400, "%s not found in %s's current web sites" % ( delete, source.label())) domain = util.domain_from_link(delete) if domain not in set(util.domain_from_link(url) for url in source.domain_urls): source.domains.remove(domain) source.put() self.messages.add('Removed %s.' % link) self.redirect(redirect_url)
def get_site_info(cls, handler, auth_entity): """Fetches the site info from the API. Args: handler: the current RequestHandler auth_entity: oauth_dropins.wordpress.WordPressAuth Returns: site info dict, or None if API calls are disabled for this blog """ try: return cls.urlopen(auth_entity, API_SITE_URL % auth_entity.blog_id) except urllib2.HTTPError, e: code, body = interpret_http_exception(e) if (code == '403' and '"API calls to this blog have been disabled."' in body): handler.messages.add( 'You need to <a href="http://jetpack.me/support/json-api/">enable ' 'the Jetpack JSON API</a> in %s\'s WordPress admin console.' % util.pretty_link(auth_entity.blog_url)) handler.redirect('/') return None raise
def get_site_info(cls, handler, auth_entity): """Fetches the site info from the API. Args: handler: the current RequestHandler auth_entity: oauth_dropins.wordpress.WordPressAuth Returns: site info dict, or None if API calls are disabled for this blog """ try: return cls.urlopen(auth_entity, API_SITE_URL % auth_entity.blog_id) except urllib2.HTTPError, e: code, body = util.interpret_http_exception(e) if (code == '403' and '"API calls to this blog have been disabled."' in body): handler.messages.add( 'You need to <a href="http://jetpack.me/support/json-api/">enable ' 'the Jetpack JSON API</a> in %s\'s WordPress admin console.' % util.pretty_link(auth_entity.blog_url)) handler.redirect('/') return None raise
def template_vars(self): entities = [] for cls in (Response,): # BlogPost for e in cls.query().order(-cls.updated): if (len(entities) >= self.NUM_ENTITIES or e.updated < datetime.datetime.now() - datetime.timedelta(hours=1)): break elif (not e.error and not e.unsent) or e.status == 'complete': continue e.links = [util.pretty_link(u, new_tab=True) for u in e.error + e.failed] if e.key.kind() == 'Response': e.response = json.loads(e.response_json) e.activities = [json.loads(a) for a in e.activities_json] else: e.response = {'content': '[BlogPost]'} e.activities = [{'url': e.key.id()}] entities.append(e) entities.sort(key=lambda e: (e.source, e.activities, e.response)) return {'responses': entities}
def responses(): """Find the most recently attempted responses and blog posts with error URLs.""" entities = [] for cls in (Response,): # BlogPost for e in cls.query().order(-cls.updated): if (len(entities) >= NUM_ENTITIES or e.updated < util.now_fn() - datetime.timedelta(hours=1)): break elif (not e.error and not e.unsent) or e.status == 'complete': continue e.links = [util.pretty_link(u, new_tab=True) for u in e.error + e.failed] if e.key.kind() == 'Response': e.response = json_loads(e.response_json) e.activities = [json_loads(a) for a in e.activities_json] else: e.response = {'content': '[BlogPost]'} e.activities = [{'url': e.key.id()}] entities.append(e) return render_template('admin_responses.html', responses=entities, logs=logs)
def link(url, g): return util.pretty_link( url, glyphicon=g, attrs={'class': 'original-post u-bridgy-target'}, new_tab=True)
def fetch_mf2(self, url, require_mf2=True, raise_errors=False): """Fetches a URL and extracts its mf2 data. Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()` on errors. Args: url: string require_mf2: boolean, whether to return error if no mf2 are found raise_errors: boolean, whether to let error exceptions propagate up or handle them Returns: (:class:`requests.Response`, mf2 data dict) on success, None on failure """ try: resp = util.requests_get(url) resp.raise_for_status() except BaseException as e: if raise_errors: raise util.interpret_http_exception(e) # log exception return self.error('Could not fetch source URL %s' % url) if self.entity: self.entity.html = resp.text # parse microformats soup = util.parse_html(resp) mf2 = util.parse_mf2(soup, resp.url) # special case tumblr's markup: div#content > div.post > div.copy # convert to mf2 and re-parse if not mf2.get('items'): contents = soup.find_all(id='content') if contents: post = contents[0].find_next(class_='post') if post: post['class'] = 'h-entry' copy = post.find_next(class_='copy') if copy: copy['class'] = 'e-content' photo = post.find_next(class_='photo-wrapper') if photo: img = photo.find_next('img') if img: img['class'] = 'u-photo' # TODO: i should be able to pass post or contents[0] to mf2py instead # here, but it returns no items. mf2py bug? doc = str(post) mf2 = util.parse_mf2(doc, resp.url) logging.debug('Parsed microformats2: %s', json_dumps(mf2, indent=2)) items = mf2.get('items', []) if require_mf2 and (not items or not items[0]): return self.error('No microformats2 data found in ' + resp.url, data=mf2, html=""" No <a href="http://microformats.org/get-started">microformats</a> or <a href="http://microformats.org/wiki/microformats2">microformats2</a> found in <a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a> for details (skip to level 2, <em>Publishing on the IndieWeb</em>). """ % (resp.url, util.pretty_link(resp.url))) return resp, mf2
def template_vars(self): vars = super(UserHandler, self).template_vars() vars.update({ 'source': self.source, 'EPOCH': util.EPOCH, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, }) if not self.source: return vars if isinstance(self.source, instagram.Instagram): auth = self.source.auth_entity vars['indieauth_me'] = ( auth.id if isinstance(auth, indieauth.IndieAuth) else self.source.domain_urls[0] if self.source.domain_urls else None) # Blog webmention promos if 'webmention' not in self.source.features: if self.source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'): vars[self.source.SHORT_NAME + '_promo'] = True else: for domain in self.source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (domain.endswith('tumblr.com') and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (domain.endswith('wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in self.source.features: vars['responses'] = [] query = Response.query().filter(Response.source == self.source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = self.request.get(param) try: return util.parse_iso8601(val) if val else None except: msg = "Couldn't parse %s %r as ISO8601" % (param, val) logging.exception(msg) self.abort(400, msg) before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: self.abort(400, "can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order(Response.updated) elif before: query = query.filter(Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json.loads(r.response_json) r.activities = [json.loads(a) for a in r.activities_json] if (not self.source.is_activity_public(r.response) or not all(self.source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] r.actor = r.response.get('author') or r.response.get('actor', {}) for a in r.activities + [r.response]: if not a.get('content'): a['content'] = a.get('object', {}).get('content') if not r.response.get('content'): phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } r.response['content'] = '%s %s.' % ( r.actor.get('displayName') or '', phrases.get(r.type) or phrases.get(r.response.get('verb'))) # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme(image_url, self) # generate original post links r.links = self.process_webmention_links(r) r.original_links = [util.pretty_link(url, new_tab=True) for url in r.original_posts] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = ( before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars['responses_after_link'] = ('?responses_after=%s#responses' % new_after.isoformat()) new_before = ( after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars['responses_before_link'] = ('?responses_before=%s#responses' % new_before.isoformat()) vars['next_poll'] = max( self.source.last_poll_attempt + self.source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in self.source.features: publishes = Publish.query().filter(Publish.source == self.source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id().decode('utf-8'), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in self.source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = self.process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == self.source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urlparse.urlparse(w.target_url()).netloc in self.source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return vars
def user(site, id): """View for a user page.""" cls = models.sources.get(site) if not cls: return render_template('user_not_found.html'), 404 source = cls.lookup(id) if not source: key = cls.query( ndb.OR(*[ ndb.GenericProperty(prop) == id for prop in ('domains', 'inferred_username', 'name', 'username') ])).get(keys_only=True) if key: return redirect(cls(key=key).bridgy_path(), code=301) if not source or not source.features: return render_template('user_not_found.html'), 404 source.verify() source = util.preprocess_source(source) vars = { 'source': source, 'logs': logs, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, } # Blog webmention promos if 'webmention' not in source.features: if source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'): vars[source.SHORT_NAME + '_promo'] = True else: for domain in source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (util.domain_or_parent_in(domain, ['tumblr.com']) and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (util.domain_or_parent_in(domain, 'wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in source.features or 'email' in source.features: vars['responses'] = [] query = Response.query().filter(Response.source == source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = request.values.get(param) try: return util.parse_iso8601(val.replace(' ', '+')) if val else None except BaseException: error(f"Couldn't parse {param}, {val!r} as ISO8601") before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: error("can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order( Response.updated) elif before: query = query.filter( Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json_loads(r.response_json) r.activities = [json_loads(a) for a in r.activities_json] if (not source.is_activity_public(r.response) or not all( source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] verb = r.response.get('verb') r.actor = (r.response.get('object') if verb == 'invite' else r.response.get('author') or r.response.get('actor')) or {} activity_content = '' for a in r.activities + [r.response]: if not a.get('content'): obj = a.get('object', {}) a['content'] = activity_content = ( obj.get('content') or obj.get('displayName') or # historical, from a Reddit bug fixed in granary@4f9df7c obj.get('name') or '') response_content = r.response.get('content') phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } phrase = phrases.get(r.type) or phrases.get(verb) if phrase and (r.type != 'repost' or activity_content.startswith(response_content)): r.response[ 'content'] = f'{r.actor.get("displayName") or ""} {phrase}.' # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme( image_url, request) # generate original post links r.links = process_webmention_links(r) r.original_links = [ util.pretty_link(url, new_tab=True) for url in r.original_posts ] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = (before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars[ 'responses_after_link'] = f'?responses_after={new_after.isoformat()}#responses' new_before = (after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars[ 'responses_before_link'] = f'?responses_before={new_before.isoformat()}#responses' vars['next_poll'] = max( source.last_poll_attempt + source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in source.features: publishes = Publish.query().filter(Publish.source == source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urllib.parse.urlparse( w.target_url()).netloc in source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return render_template(f'{source.SHORT_NAME}_user.html', **vars)
def template_vars(self): if not self.source: return {} vars = super(UserHandler, self).template_vars() vars.update({ 'source': self.source, 'epoch': util.EPOCH, }) # Blog webmention promos if 'webmention' not in self.source.features: if self.source.SHORT_NAME in ('blogger', 'tumblr', 'wordpress'): vars[self.source.SHORT_NAME + '_promo'] = True else: for domain in self.source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (domain.endswith('tumblr.com') and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (domain.endswith('wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in self.source.features: vars['responses'] = [] for i, r in enumerate(Response.query() .filter(Response.source == self.source.key)\ .order(-Response.updated)): r.response = json.loads(r.response_json) if r.activity_json: # handle old entities r.activities_json.append(r.activity_json) r.activities = [json.loads(a) for a in r.activities_json] if (not gr_source.Source.is_public(r.response) or not all(gr_source.Source.is_public(a) for a in r.activities)): continue r.actor = r.response.get('author') or r.response.get('actor', {}) if not r.response.get('content'): phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'invite': 'is invited', } r.response['content'] = '%s %s.' % ( r.actor.get('displayName') or '', phrases.get(r.type) or phrases.get(r.response.get('verb'))) # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme(image_url, self) # generate original post links r.links = self.process_webmention_links(r) vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break # Publishes if 'publish' in self.source.features: publishes = Publish.query().filter(Publish.source == self.source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), a_class='original-post', new_tab=True) vars['publishes'] = publishes if 'webmention' in self.source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = self.process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link(b.key.id(), text=text, a_class='original-post', max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == self.source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link(w.source_url(), a_class='original-post', new_tab=True) try: target_is_source = (urlparse.urlparse(w.target_url()).netloc in self.source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link(w.target_url(), a_class='original-post', new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return vars
def template_vars(self): vars = super(UserHandler, self).template_vars() vars.update({ 'source': self.source, 'EPOCH': util.EPOCH, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, }) if not self.source: return vars if isinstance(self.source, instagram.Instagram): auth = self.source.auth_entity vars['indieauth_me'] = ( auth.id if isinstance(auth, indieauth.IndieAuth) else self.source.domain_urls[0] if self.source.domain_urls else None) # Blog webmention promos if 'webmention' not in self.source.features: if self.source.SHORT_NAME in ('blogger', 'tumblr', 'wordpress'): vars[self.source.SHORT_NAME + '_promo'] = True else: for domain in self.source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (domain.endswith('tumblr.com') and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (domain.endswith('wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in self.source.features: vars['responses'] = [] query = Response.query().filter(Response.source == self.source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = self.request.get(param) try: return util.parse_iso8601(val) if val else None except: msg = "Couldn't parse %s %r as ISO8601" % (param, val) logging.exception(msg) self.abort(400, msg) before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: self.abort(400, "can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order(Response.updated) elif before: query = query.filter(Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json.loads(r.response_json) r.activities = [json.loads(a) for a in r.activities_json] if (not self.source.is_activity_public(r.response) or not all(self.source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] r.actor = r.response.get('author') or r.response.get('actor', {}) for a in r.activities + [r.response]: if not a.get('content'): a['content'] = a.get('object', {}).get('content') if not r.response.get('content'): phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } r.response['content'] = '%s %s.' % ( r.actor.get('displayName') or '', phrases.get(r.type) or phrases.get(r.response.get('verb'))) # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme(image_url, self) # generate original post links r.links = self.process_webmention_links(r) r.original_links = [util.pretty_link(url, new_tab=True) for url in r.original_posts] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = ( before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars['responses_after_link'] = ('?responses_after=%s#responses' % new_after.isoformat()) new_before = ( after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars['responses_before_link'] = ('?responses_before=%s#responses' % new_before.isoformat()) vars['next_poll'] = max( self.source.last_poll_attempt + self.source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in self.source.features: publishes = Publish.query().filter(Publish.source == self.source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in self.source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = self.process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == self.source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urlparse.urlparse(w.target_url()).netloc in self.source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return vars
def fetch_mf2(self, url, require_mf2=True, raise_errors=False): """Fetches a URL and extracts its mf2 data. Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()` on errors. Args: url: string require_mf2: boolean, whether to return error if no mf2 are found raise_errors: boolean, whether to let error exceptions propagate up or handle them Returns: (:class:`requests.Response`, mf2 data dict) on success, None on failure """ try: fetched = util.requests_get(url) fetched.raise_for_status() except BaseException as e: if raise_errors: raise util.interpret_http_exception(e) # log exception return self.error('Could not fetch source URL %s' % url) if self.entity: self.entity.html = fetched.text # .text is decoded unicode string, .content is raw bytes. if the HTTP # headers didn't specify a charset, pass raw bytes to BeautifulSoup so it # can look for a <meta> tag with a charset and decode. text = (fetched.text if 'charset' in fetched.headers.get( 'content-type', '') else fetched.content) doc = util.beautifulsoup_parse(text) # parse microformats data = util.mf2py_parse(doc, fetched.url) # special case tumblr's markup: div#content > div.post > div.copy # convert to mf2 and re-parse if not data.get('items'): contents = doc.find_all(id='content') if contents: post = contents[0].find_next(class_='post') if post: post['class'] = 'h-entry' copy = post.find_next(class_='copy') if copy: copy['class'] = 'e-content' photo = post.find_next(class_='photo-wrapper') if photo: img = photo.find_next('img') if img: img['class'] = 'u-photo' doc = unicode(post) data = util.mf2py_parse(doc, fetched.url) logging.debug('Parsed microformats2: %s', json.dumps(data, indent=2)) items = data.get('items', []) if require_mf2 and (not items or not items[0]): return self.error('No microformats2 data found in ' + fetched.url, data=data, html=""" No <a href="http://microformats.org/get-started">microformats</a> or <a href="http://microformats.org/wiki/microformats2">microformats2</a> found in <a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a> for details (skip to level 2, <em>Publishing on the IndieWeb</em>). """ % (fetched.url, util.pretty_link(fetched.url))) return fetched, data