def post(self): # load source try: source = ndb.Key(urlsafe=util.get_required_param(self, 'source_key')).get() if not source: self.abort(400, 'Source key not found') except ProtocolBufferDecodeError: logging.exception('Bad value for source_key') self.abort(400, 'Bad value for source_key') # validate URL, find silo post url = util.get_required_param(self, 'url') domain = util.domain_from_link(url) msg = 'Discovering now. Refresh in a minute to see the results!' if domain == source.GR_CLASS.DOMAIN: post_id = source.GR_CLASS.post_id(url) util.add_discover_task(source, post_id) elif util.domain_or_parent_in(domain, source.domains): synd_links = original_post_discovery.process_entry(source, url, {}, False, []) if synd_links: for link in synd_links: util.add_discover_task(source, source.GR_CLASS.post_id(link)) else: msg = 'Failed to fetch %s or find a %s syndication link.' % ( util.pretty_link(url), source.GR_CLASS.NAME) else: msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME self.messages.add(msg) self.redirect(source.bridgy_url(self))
def discover(): source = util.load_source() # validate URL, find silo post url = request.form['url'] domain = util.domain_from_link(url) path = urllib.parse.urlparse(url).path msg = 'Discovering now. Refresh in a minute to see the results!' gr_source = source.gr_source if domain == gr_source.DOMAIN: post_id = gr_source.post_id(url) if post_id: type = 'event' if path.startswith('/events/') else None util.add_discover_task(source, post_id, type=type) else: msg = f"Sorry, that doesn't look like a {gr_source.NAME} post URL." elif util.domain_or_parent_in(domain, source.domains): synd_links = original_post_discovery.process_entry( source, url, {}, False, []) if synd_links: for link in synd_links: util.add_discover_task(source, gr_source.post_id(link)) source.updates = {'last_syndication_url': util.now_fn()} models.Source.put_updates(source) else: msg = f'Failed to fetch {util.pretty_link(url)} or find a {gr_source.NAME} syndication link.' else: msg = f'Please enter a URL on either your web site or {gr_source.NAME}.' flash(msg) return redirect(source.bridgy_url())
def post(self): source = self.load_source() # validate URL, find silo post url = util.get_required_param(self, 'url') domain = util.domain_from_link(url) path = urllib.parse.urlparse(url).path msg = 'Discovering now. Refresh in a minute to see the results!' if domain == source.GR_CLASS.DOMAIN: post_id = source.GR_CLASS.post_id(url) if post_id: type = 'event' if path.startswith('/events/') else None util.add_discover_task(source, post_id, type=type) else: msg = "Sorry, that doesn't look like a %s post URL." % source.GR_CLASS.NAME elif util.domain_or_parent_in(domain, source.domains): synd_links = original_post_discovery.process_entry( source, url, {}, False, []) if synd_links: for link in synd_links: util.add_discover_task(source, source.GR_CLASS.post_id(link)) source.updates = {'last_syndication_url': util.now_fn()} models.Source.put_updates(source) else: msg = 'Failed to fetch %s or find a %s syndication link.' % ( util.pretty_link(url), source.GR_CLASS.NAME) else: msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME self.messages.add(msg) self.redirect(source.bridgy_url(self))
def post(self): source = self.load_source() # validate URL, find silo post url = util.get_required_param(self, 'url') domain = util.domain_from_link(url) path = urlparse.urlparse(url).path msg = 'Discovering now. Refresh in a minute to see the results!' if domain == source.GR_CLASS.DOMAIN: post_id = source.GR_CLASS.post_id(url) if post_id: type = 'event' if path.startswith('/events/') else None util.add_discover_task(source, post_id, type=type) else: msg = "Sorry, that doesn't look like a %s post URL." % source.GR_CLASS.NAME elif util.domain_or_parent_in(domain, source.domains): synd_links = original_post_discovery.process_entry(source, url, {}, False, []) if synd_links: for link in synd_links: util.add_discover_task(source, source.GR_CLASS.post_id(link)) source.updates = {'last_syndication_url': util.now_fn()} models.Source.put_updates(source) else: msg = 'Failed to fetch %s or find a %s syndication link.' % ( util.pretty_link(url), source.GR_CLASS.NAME) else: msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME self.messages.add(msg) self.redirect(source.bridgy_url(self))
def _find_source(self, source_cls, url, domain): """Returns the source that should publish a post URL, or None if not found. Args: source_cls: :class:`models.Source` subclass for this silo url: string domain: string, url's domain Returns: :class:`models.Source` """ domain = domain.lower() if util.domain_or_parent_in(domain, util.DOMAINS): return self.error( f'Source URL should be on your own site, not {domain}') sources = source_cls.query().filter( source_cls.domains == domain).fetch(100) if not sources: msg = f'Could not find <b>{source_cls.GR_CLASS.NAME}</b> account for <b>{domain}</b>. Check that your {source_cls.GR_CLASS.NAME} profile has {domain} in its <em>web site</em> or <em>link</em> field, then try signing up again.' return self.error(msg, html=msg) current_url = '' sources_ready = [] best_match = None for source in sources: logging.info( f'Source: {source.bridgy_url()} , features {source.features}, status {source.status}, poll status {source.poll_status}' ) if source.status != 'disabled' and 'publish' in source.features: # use a source that has a domain_url matching the url provided, # including path. find the source with the closest match. sources_ready.append(source) schemeless_url = util.schemeless(url.lower()).strip('/') for domain_url in source.domain_urls: schemeless_domain_url = util.schemeless( domain_url.lower()).strip('/') if (schemeless_url.startswith(schemeless_domain_url) and len(domain_url) > len(current_url)): current_url = domain_url best_match = source if best_match: return best_match if sources_ready: msg = f'No account found that matches {util.pretty_link(url)}. Check that <a href="{util.host_url("/about#profile-link")}">the web site URL is in your silo profile</a>, then <a href="{request.host_url}">sign up again</a>.' else: msg = f'Publish is not enabled for your account. <a href="{request.host_url}">Try signing up!</a>' self.error(msg, html=msg)
def test_domain_or_parent_in(self): for expected, inputs in ( (False, [ ('', []), ('', ['']), ('x', []), ('x', ['']), ('x.y', []), ('x.y', ['']), ('', ['x', 'y']), ('', ['x.y']), ('x', ['y']), ('xy', ['y', 'x']), ('x', ['yx']), ('v.w.x', ['v.w', 'x.w']), ('x', ['', 'y', 'xy', 'yx', 'xx', 'xxx']), ]), (True, [ ('x', ['x']), ('x', ['x', 'y']), ('x', ['y', 'x']), ('w.x', ['x']), ('u.v.w.x', ['y', 'v.w.x']), ])): for input, domains in inputs: self.assertEquals(expected, util.domain_or_parent_in(input, domains), `input, domains, expected`)
def template_vars(self): vars = super(UserHandler, self).template_vars() vars.update({ 'source': self.source, 'sources': models.sources, 'logs': logs, 'EPOCH': util.EPOCH, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, }) if not self.source: return vars # Blog webmention promos if 'webmention' not in self.source.features: if self.source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'): vars[self.source.SHORT_NAME + '_promo'] = True else: for domain in self.source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (util.domain_or_parent_in(domain, ['tumblr.com']) and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (util.domain_or_parent_in(domain, 'wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in self.source.features or 'email' in self.source.features: vars['responses'] = [] query = Response.query().filter(Response.source == self.source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = self.request.get(param) try: return util.parse_iso8601(val) if val else None except: msg = "Couldn't parse %s %r as ISO8601" % (param, val) logging.warning(msg, stack_info=True) self.abort(400, msg) before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: self.abort(400, "can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order(Response.updated) elif before: query = query.filter(Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json_loads(r.response_json) r.activities = [json_loads(a) for a in r.activities_json] if (not self.source.is_activity_public(r.response) or not all(self.source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] verb = r.response.get('verb') r.actor = (r.response.get('object') if verb == 'invite' else r.response.get('author') or r.response.get('actor') ) or {} activity_content = '' for a in r.activities + [r.response]: if not a.get('content'): obj = a.get('object', {}) a['content'] = activity_content = ( obj.get('content') or obj.get('displayName') or # historical, from a Reddit bug fixed in granary@4f9df7c obj.get('name') or '') response_content = r.response.get('content') phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } phrase = phrases.get(r.type) or phrases.get(verb) if phrase and (r.type != 'repost' or activity_content.startswith(response_content)): r.response['content'] = '%s %s.' % ( r.actor.get('displayName') or '', phrase) # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme(image_url, self) # generate original post links r.links = self.process_webmention_links(r) r.original_links = [util.pretty_link(url, new_tab=True) for url in r.original_posts] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = ( before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars['responses_after_link'] = ('?responses_after=%s#responses' % new_after.isoformat()) new_before = ( after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars['responses_before_link'] = ('?responses_before=%s#responses' % new_before.isoformat()) vars['next_poll'] = max( self.source.last_poll_attempt + self.source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in self.source.features: publishes = Publish.query().filter(Publish.source == self.source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in self.source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = self.process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == self.source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urllib.parse.urlparse(w.target_url()).netloc in self.source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return vars
def user(site, id): """View for a user page.""" cls = models.sources.get(site) if not cls: return render_template('user_not_found.html'), 404 source = cls.lookup(id) if not source: key = cls.query( ndb.OR(*[ ndb.GenericProperty(prop) == id for prop in ('domains', 'inferred_username', 'name', 'username') ])).get(keys_only=True) if key: return redirect(cls(key=key).bridgy_path(), code=301) if not source or not source.features: return render_template('user_not_found.html'), 404 source.verify() source = util.preprocess_source(source) vars = { 'source': source, 'logs': logs, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, } # Blog webmention promos if 'webmention' not in source.features: if source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'): vars[source.SHORT_NAME + '_promo'] = True else: for domain in source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (util.domain_or_parent_in(domain, ['tumblr.com']) and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (util.domain_or_parent_in(domain, 'wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in source.features or 'email' in source.features: vars['responses'] = [] query = Response.query().filter(Response.source == source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = request.values.get(param) try: return util.parse_iso8601(val.replace(' ', '+')) if val else None except BaseException: error(f"Couldn't parse {param}, {val!r} as ISO8601") before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: error("can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order( Response.updated) elif before: query = query.filter( Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json_loads(r.response_json) r.activities = [json_loads(a) for a in r.activities_json] if (not source.is_activity_public(r.response) or not all( source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] verb = r.response.get('verb') r.actor = (r.response.get('object') if verb == 'invite' else r.response.get('author') or r.response.get('actor')) or {} activity_content = '' for a in r.activities + [r.response]: if not a.get('content'): obj = a.get('object', {}) a['content'] = activity_content = ( obj.get('content') or obj.get('displayName') or # historical, from a Reddit bug fixed in granary@4f9df7c obj.get('name') or '') response_content = r.response.get('content') phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } phrase = phrases.get(r.type) or phrases.get(verb) if phrase and (r.type != 'repost' or activity_content.startswith(response_content)): r.response[ 'content'] = f'{r.actor.get("displayName") or ""} {phrase}.' # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme( image_url, request) # generate original post links r.links = process_webmention_links(r) r.original_links = [ util.pretty_link(url, new_tab=True) for url in r.original_posts ] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = (before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars[ 'responses_after_link'] = f'?responses_after={new_after.isoformat()}#responses' new_before = (after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars[ 'responses_before_link'] = f'?responses_before={new_before.isoformat()}#responses' vars['next_poll'] = max( source.last_poll_attempt + source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in source.features: publishes = Publish.query().filter(Publish.source == source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urllib.parse.urlparse( w.target_url()).netloc in source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return render_template(f'{source.SHORT_NAME}_user.html', **vars)