def test_bad_target_url(self, mock_get, mock_post): mock_get.side_effect = (requests_response( self.reply_html.replace('http://orig/post', 'bad'), content_type=CONTENT_TYPE_HTML), ValueError('foo bar')) got = self.client.post('/webmention', data={'source': 'http://a/post'}) self.assertEqual(400, got.status_code) self.assertEqual(0, Response.query().count())
def test_bad_source_url(self, mock_get, mock_post): got = self.client.post('/webmention', data=b'') self.assertEqual(400, got.status_code) mock_get.side_effect = ValueError('foo bar') got = self.client.post('/webmention', data={'source': 'bad'}) self.assertEqual(400, got.status_code) self.assertEqual(0, Response.query().count())
def refetch_hfeed(self, source): """refetch and reprocess the author's url, looking for new or updated syndication urls that we may have missed the first time we looked for them. """ logging.debug('refetching h-feed for source %s', source.label()) relationships = original_post_discovery.refetch(source) if not relationships: return logging.debug('refetch h-feed found %d new rel=syndication relationships', len(relationships)) # grab the Responses and see if any of them have a a syndication # url matching one of the newly discovered relationships. We'll # check each response until we've seen all of them or until # the 60s timer runs out. # TODO maybe add a (canonicalized) url field to Response so we can # query by it instead of iterating over all of them for response in (Response.query(Response.source == source.key) .order(-Response.created)): if response.activity_json: # handle old entities response.activities_json.append(response.activity_json) response.activity_json = None new_orig_urls = set() for activity_json in response.activities_json: activity = json.loads(activity_json) activity_url = activity.get('url') or activity.get('object', {}).get('url') if not activity_url: logging.warning('activity has no url %s', activity_json) continue activity_url = source.canonicalize_syndication_url(activity_url) # look for activity url in the newly discovered list of relationships for relationship in relationships.get(activity_url, []): # won't re-propagate if the discovered link is already among # these well-known upstream duplicates if relationship.original in response.sent: logging.info( '%s found a new rel=syndication link %s -> %s, but the ' 'relationship had already been discovered by another method', response.label(), relationship.original, relationship.syndication) else: logging.info( '%s found a new rel=syndication link %s -> %s, and ' 'will be repropagated with a new target!', response.label(), relationship.original, relationship.syndication) new_orig_urls.add(relationship.original) if new_orig_urls: # re-open a previously 'complete' propagate task response.status = 'new' response.unsent.extend(list(new_orig_urls)) response.put() response.add_task()
def test_target_fetch_has_no_content_type(self, mock_get, mock_post): html = self.reply_html.replace( '</body>', "<link href='http://as2' rel='alternate' type='application/activity+json'></body" ) mock_get.side_effect = ( requests_response(self.reply_html), # http://not/fediverse requests_response(self.reply_html, content_type='None'), ) got = self.client.post('/webmention', data={'source': 'http://a/post'}) self.assertEqual(502, got.status_code) self.assertEqual(0, Response.query().count())
def test_get_or_save_new(self): """new. should add a propagate task.""" response = self.responses[0] self.assertEqual(0, Response.query().count()) self.expect_task('propagate', response_key=self.responses[0]) self.mox.ReplayAll() saved = response.get_or_save(self.sources[0]) self.assertEqual(response.key, saved.key) self.assertEqual(response.source, saved.source) self.assertEqual('comment', saved.type) self.assertEqual([], saved.old_response_jsons)
def responses(): """Renders recent Responses, with links to logs.""" responses = Response.query().order(-Response.updated).fetch(20) for r in responses: r.source_link = util.pretty_link(r.source()) r.target_link = util.pretty_link(r.target()) r.log_url_path = '/log?' + urllib.parse.urlencode({ 'key': r.key.id(), 'start_time': calendar.timegm(r.updated.timetuple()), }) return render_template('responses.html', responses=responses)
def template_vars(self): responses = Response.query().order(-Response.updated).fetch(20) for r in responses: r.source_link = util.pretty_link(r.source()) r.target_link = util.pretty_link(r.target()) r.log_url_path = '/log?' + urllib.parse.urlencode({ 'key': r.key.id(), 'start_time': calendar.timegm(r.updated.timetuple()), }) return { 'responses': responses, }
def repropagate_old_responses(self, source, relationships): """Find old Responses that match a new SyndicatedPost and repropagate them. We look through as many responses as we can until the datastore query expires. Args: source: :class:`models.Source` relationships: refetch result """ for response in Response.query(Response.source == source.key).order(-Response.updated): new_orig_urls = set() for activity_json in response.activities_json: activity = json.loads(activity_json) activity_url = activity.get("url") or activity.get("object", {}).get("url") if not activity_url: logging.warning("activity has no url %s", activity_json) continue activity_url = source.canonicalize_url(activity_url, activity=activity) if not activity_url: continue # look for activity url in the newly discovered list of relationships for relationship in relationships.get(activity_url, []): # won't re-propagate if the discovered link is already among # these well-known upstream duplicates if relationship.original in response.sent or relationship.original in response.original_posts: logging.info( "%s found a new rel=syndication link %s -> %s, but the " "relationship had already been discovered by another method", response.label(), relationship.original, relationship.syndication, ) else: logging.info( "%s found a new rel=syndication link %s -> %s, and " "will be repropagated with a new target!", response.label(), relationship.original, relationship.syndication, ) new_orig_urls.add(relationship.original) if new_orig_urls: # re-open a previously 'complete' propagate task response.status = "new" response.unsent.extend(list(new_orig_urls)) response.put() response.add_task()
def repropagate_old_responses(self, source, relationships): """Find old Responses that match a new SyndicatedPost and repropagate them. We look through as many responses as we can until the datastore query expires. Args: source: :class:`models.Source` relationships: refetch result """ for response in (Response.query( Response.source == source.key).order(-Response.updated)): new_orig_urls = set() for activity_json in response.activities_json: activity = json_loads(activity_json) activity_url = activity.get('url') or activity.get( 'object', {}).get('url') if not activity_url: logging.warning('activity has no url %s', activity_json) continue activity_url = source.canonicalize_url(activity_url, activity=activity) if not activity_url: continue # look for activity url in the newly discovered list of relationships for relationship in relationships.get(activity_url, []): # won't re-propagate if the discovered link is already among # these well-known upstream duplicates if (relationship.original in response.sent or relationship.original in response.original_posts): logging.info( '%s found a new rel=syndication link %s -> %s, but the ' 'relationship had already been discovered by another method', response.label(), relationship.original, relationship.syndication) else: logging.info( '%s found a new rel=syndication link %s -> %s, and ' 'will be repropagated with a new target!', response.label(), relationship.original, relationship.syndication) new_orig_urls.add(relationship.original) if new_orig_urls: # re-open a previously 'complete' propagate task response.status = 'new' response.unsent.extend(list(new_orig_urls)) response.put() response.add_task()
def test_no_backlink(self, mock_get, mock_post): mock_get.return_value = requests_response( self.reply_html.replace('<a href="http://localhost/"></a>', ''), content_type=CONTENT_TYPE_HTML) got = self.client.post('/webmention', data={ 'source': 'http://a/post', 'target': 'https://fed.brid.gy/', }) self.assertEqual(400, got.status_code) self.assertEqual(0, Response.query().count()) mock_get.assert_has_calls((self.req('http://a/post'), ))
def test_good(self): self.expect_requests_get( 'http://foo.com/', """ <html class="h-feed"> <div class="h-entry"> <a class="u-url" href="http://foo.com/post"></a> <a class="u-syndication" href="https://www.facebook.com/snarfed.org/posts/123"></a> </div> </html>""") self.mox.ReplayAll() self.handler.receive(self.mail) self.assert_equals(200, self.response.status_code) emails = list(FacebookEmail.query()) self.assertEquals(1, len(emails)) self.assert_equals('SMTP-123-xyz', emails[0].key.id()) self.assert_equals(self.fea.key, emails[0].source) self.assert_equals([COMMENT_EMAIL_USERNAME], emails[0].htmls) resp_id = EMAIL_COMMENT_OBJ_USERNAME['id'] self.assert_equals(ndb.Key('Response', resp_id), emails[0].response) expected = Response( id=resp_id, source=self.fea.key, type='comment', response_json=json.dumps(EMAIL_COMMENT_OBJ_USERNAME), activities_json=[ json.dumps({ 'id': '123', 'numeric_id': '123', 'url': 'https://www.facebook.com/212038/posts/123', 'author': { 'id': 'snarfed.org' }, }) ], unsent=['http://foo.com/post']) self.assert_entities_equal([expected], list(Response.query()), ignore=('created', 'updated')) tasks = self.taskqueue_stub.GetTasks('propagate') self.assertEquals(1, len(tasks)) self.assert_equals(expected.key.urlsafe(), testutil.get_task_params(tasks[0])['response_key']) self.assert_equals(EMAIL_COMMENT_OBJ_USERNAME, self.fea.get_comment('123_789'))
def test_inbox_reply_drop_self_domain_target(self, mock_head, mock_get, mock_post): reply = copy.deepcopy(REPLY_OBJECT) # same domain as source; should drop reply['inReplyTo'] = 'http://localhost/this', mock_head.return_value = requests_response(url='http://this/') got = self.client.post('/foo.com/inbox', json=reply) self.assertEqual(200, got.status_code, got.get_data(as_text=True)) mock_head.assert_called_once_with( 'http://this', allow_redirects=True, stream=True, timeout=15) mock_get.assert_not_called() mock_post.assert_not_called() self.assertEqual(0, Response.query().count())
def template_vars(self): responses = Response.query().order(-Response.updated).fetch(20) for r in responses: r.source_link = util.pretty_link(r.source()) r.target_link = util.pretty_link(r.target()) # TODO: support inbound too if r.direction == 'out' and r.updated >= VERSION_1_DEPLOYED: r.log_url_path = '/log?' + urllib.urlencode({ 'key': r.key.id(), 'start_time': calendar.timegm(r.updated.timetuple()), }) return { 'responses': responses, }
def repropagate_old_responses(self, source, relationships): """Find old Responses that match a new SyndicatedPost and repropagate them. We look through as many responses as we can until the datastore query expires. """ for response in (Response.query(Response.source == source.key) .order(-Response.updated)): if response.activity_json: # handle old entities response.activities_json.append(response.activity_json) response.activity_json = None new_orig_urls = set() for activity_json in response.activities_json: activity = json.loads(activity_json) activity_url = activity.get('url') or activity.get('object', {}).get('url') if not activity_url: logging.warning('activity has no url %s', activity_json) continue activity_url = source.canonicalize_syndication_url(activity_url, activity=activity) # look for activity url in the newly discovered list of relationships for relationship in relationships.get(activity_url, []): # won't re-propagate if the discovered link is already among # these well-known upstream duplicates if (relationship.original in response.sent or relationship.original in response.original_posts): logging.info( '%s found a new rel=syndication link %s -> %s, but the ' 'relationship had already been discovered by another method', response.label(), relationship.original, relationship.syndication) else: logging.info( '%s found a new rel=syndication link %s -> %s, and ' 'will be repropagated with a new target!', response.label(), relationship.original, relationship.syndication) new_orig_urls.add(relationship.original) if new_orig_urls: # re-open a previously 'complete' propagate task response.status = 'new' response.unsent.extend(list(new_orig_urls)) response.put() response.add_task()
def test_get_or_save(self): response = self.responses[0] self.assertEqual(0, Response.query().count()) self.assert_no_propagate_task() # new. should add a propagate task. saved = response.get_or_save(self.sources[0]) self.assertEqual(response.key, saved.key) self.assertEqual(response.source, saved.source) self.assertEqual('comment', saved.type) self.assertEqual([], saved.old_response_jsons) self.assert_propagate_task() # existing. no new task. same = saved.get_or_save(self.sources[0]) self.assert_entities_equal(saved, same) self.assert_no_propagate_task()
def test_inbox_reply_drop_self_domain_target(self, mock_head, mock_get, mock_post): reply = copy.deepcopy(REPLY_OBJECT) # same domain as source; should drop reply['inReplyTo'] = 'http://localhost/this', mock_head.return_value = requests_response(url='http://this/') got = app.get_response('/foo.com/inbox', method='POST', body=json.dumps(reply)) self.assertEquals(200, got.status_int, got.body) mock_head.assert_called_once_with('http://this', allow_redirects=True, timeout=15) mock_get.assert_not_called() mock_post.assert_not_called() self.assertEquals(0, Response.query().count())
def test_no_targets(self, mock_get, mock_post): mock_get.return_value = requests_response( """ <html> <body class="h-entry"> <p class="e-content">no one to send to! <a href="http://localhost/"></a></p> </body> </html>""", content_type=CONTENT_TYPE_HTML) got = self.client.post('/webmention', data={ 'source': 'http://a/post', 'target': 'https://fed.brid.gy/', }) self.assertEqual(200, got.status_code) self.assertEqual(0, Response.query().count()) mock_get.assert_has_calls((self.req('http://a/post'), ))
def test_no_source_entry(self, mock_get, mock_post): mock_get.return_value = requests_response( """ <html> <body> <p>nothing to see here except <a href="http://localhost/">link</a></p> </body> </html>""", content_type=CONTENT_TYPE_HTML) got = self.client.post('/webmention', data={ 'source': 'http://a/post', 'target': 'https://fed.brid.gy/', }) self.assertEqual(400, got.status_code) self.assertEqual(0, Response.query().count()) mock_get.assert_has_calls((self.req('http://a/post'), ))
def template_vars(self): responses = [] # Find the most recently propagated responses with error URLs for r in Response.query().order(-Response.updated): if (len(responses) >= self.NUM_RESPONSES or r.updated < datetime.datetime.now() - datetime.timedelta(hours=1)): break elif not r.error or r.status == 'complete': continue # r.source = r.source.get() r.links = [util.pretty_link(u, new_tab=True) for u in r.error + r.failed] r.response = json.loads(r.response_json) r.activities = [json.loads(a) for a in r.activities_json] responses.append(r) responses.sort(key=lambda r: (r.source, r.activities, r.response)) return {'responses': responses}
def test_get_or_save(self): self.sources[0].put() response = self.responses[0] self.assertEqual(0, Response.query().count()) self.assertEqual(0, len(self.taskqueue_stub.GetTasks('propagate'))) # new. should add a propagate task. saved = response.get_or_save() self.assertEqual(response.key, saved.key) self.assertEqual(response.source, saved.source) self.assertEqual('comment', saved.type) tasks = self.taskqueue_stub.GetTasks('propagate') self.assertEqual(1, len(tasks)) self.assertEqual(response.key.urlsafe(), testutil.get_task_params(tasks[0])['response_key']) self.assertEqual('/_ah/queue/propagate', tasks[0]['url']) # existing. no new task. same = saved.get_or_save() self.assertEqual(saved.source, same.source) self.assertEqual(1, len(tasks))
def template_vars(self): vars = super(UserHandler, self).template_vars() vars.update({ 'source': self.source, 'EPOCH': util.EPOCH, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, }) if not self.source: return vars if isinstance(self.source, instagram.Instagram): auth = self.source.auth_entity vars['indieauth_me'] = ( auth.id if isinstance(auth, indieauth.IndieAuth) else self.source.domain_urls[0] if self.source.domain_urls else None) # Blog webmention promos if 'webmention' not in self.source.features: if self.source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'): vars[self.source.SHORT_NAME + '_promo'] = True else: for domain in self.source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (domain.endswith('tumblr.com') and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (domain.endswith('wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in self.source.features: vars['responses'] = [] query = Response.query().filter(Response.source == self.source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = self.request.get(param) try: return util.parse_iso8601(val) if val else None except: msg = "Couldn't parse %s %r as ISO8601" % (param, val) logging.exception(msg) self.abort(400, msg) before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: self.abort(400, "can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order(Response.updated) elif before: query = query.filter(Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json.loads(r.response_json) r.activities = [json.loads(a) for a in r.activities_json] if (not self.source.is_activity_public(r.response) or not all(self.source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] r.actor = r.response.get('author') or r.response.get('actor', {}) for a in r.activities + [r.response]: if not a.get('content'): a['content'] = a.get('object', {}).get('content') if not r.response.get('content'): phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } r.response['content'] = '%s %s.' % ( r.actor.get('displayName') or '', phrases.get(r.type) or phrases.get(r.response.get('verb'))) # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme(image_url, self) # generate original post links r.links = self.process_webmention_links(r) r.original_links = [util.pretty_link(url, new_tab=True) for url in r.original_posts] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = ( before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars['responses_after_link'] = ('?responses_after=%s#responses' % new_after.isoformat()) new_before = ( after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars['responses_before_link'] = ('?responses_before=%s#responses' % new_before.isoformat()) vars['next_poll'] = max( self.source.last_poll_attempt + self.source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in self.source.features: publishes = Publish.query().filter(Publish.source == self.source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id().decode('utf-8'), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in self.source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = self.process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == self.source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urlparse.urlparse(w.target_url()).netloc in self.source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return vars
def template_vars(self): vars = super(UserHandler, self).template_vars() vars.update({ 'source': self.source, 'EPOCH': util.EPOCH, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, }) if not self.source: return vars if isinstance(self.source, instagram.Instagram): auth = self.source.auth_entity vars['indieauth_me'] = ( auth.id if isinstance(auth, indieauth.IndieAuth) else self.source.domain_urls[0] if self.source.domain_urls else None) # Blog webmention promos if 'webmention' not in self.source.features: if self.source.SHORT_NAME in ('blogger', 'tumblr', 'wordpress'): vars[self.source.SHORT_NAME + '_promo'] = True else: for domain in self.source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (domain.endswith('tumblr.com') and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (domain.endswith('wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in self.source.features: vars['responses'] = [] query = Response.query().filter(Response.source == self.source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = self.request.get(param) try: return util.parse_iso8601(val) if val else None except: msg = "Couldn't parse %s %r as ISO8601" % (param, val) logging.exception(msg) self.abort(400, msg) before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: self.abort(400, "can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order(Response.updated) elif before: query = query.filter(Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json.loads(r.response_json) r.activities = [json.loads(a) for a in r.activities_json] if (not self.source.is_activity_public(r.response) or not all(self.source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] r.actor = r.response.get('author') or r.response.get('actor', {}) for a in r.activities + [r.response]: if not a.get('content'): a['content'] = a.get('object', {}).get('content') if not r.response.get('content'): phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } r.response['content'] = '%s %s.' % ( r.actor.get('displayName') or '', phrases.get(r.type) or phrases.get(r.response.get('verb'))) # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme(image_url, self) # generate original post links r.links = self.process_webmention_links(r) r.original_links = [util.pretty_link(url, new_tab=True) for url in r.original_posts] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = ( before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars['responses_after_link'] = ('?responses_after=%s#responses' % new_after.isoformat()) new_before = ( after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars['responses_before_link'] = ('?responses_before=%s#responses' % new_before.isoformat()) vars['next_poll'] = max( self.source.last_poll_attempt + self.source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in self.source.features: publishes = Publish.query().filter(Publish.source == self.source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in self.source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = self.process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == self.source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urlparse.urlparse(w.target_url()).netloc in self.source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return vars
def user(site, id): """View for a user page.""" cls = models.sources.get(site) if not cls: return render_template('user_not_found.html'), 404 source = cls.lookup(id) if not source: key = cls.query( ndb.OR(*[ ndb.GenericProperty(prop) == id for prop in ('domains', 'inferred_username', 'name', 'username') ])).get(keys_only=True) if key: return redirect(cls(key=key).bridgy_path(), code=301) if not source or not source.features: return render_template('user_not_found.html'), 404 source.verify() source = util.preprocess_source(source) vars = { 'source': source, 'logs': logs, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, } # Blog webmention promos if 'webmention' not in source.features: if source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'): vars[source.SHORT_NAME + '_promo'] = True else: for domain in source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (util.domain_or_parent_in(domain, ['tumblr.com']) and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (util.domain_or_parent_in(domain, 'wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in source.features or 'email' in source.features: vars['responses'] = [] query = Response.query().filter(Response.source == source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = request.values.get(param) try: return util.parse_iso8601(val.replace(' ', '+')) if val else None except BaseException: error(f"Couldn't parse {param}, {val!r} as ISO8601") before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: error("can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order( Response.updated) elif before: query = query.filter( Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json_loads(r.response_json) r.activities = [json_loads(a) for a in r.activities_json] if (not source.is_activity_public(r.response) or not all( source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] verb = r.response.get('verb') r.actor = (r.response.get('object') if verb == 'invite' else r.response.get('author') or r.response.get('actor')) or {} activity_content = '' for a in r.activities + [r.response]: if not a.get('content'): obj = a.get('object', {}) a['content'] = activity_content = ( obj.get('content') or obj.get('displayName') or # historical, from a Reddit bug fixed in granary@4f9df7c obj.get('name') or '') response_content = r.response.get('content') phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } phrase = phrases.get(r.type) or phrases.get(verb) if phrase and (r.type != 'repost' or activity_content.startswith(response_content)): r.response[ 'content'] = f'{r.actor.get("displayName") or ""} {phrase}.' # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme( image_url, request) # generate original post links r.links = process_webmention_links(r) r.original_links = [ util.pretty_link(url, new_tab=True) for url in r.original_posts ] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = (before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars[ 'responses_after_link'] = f'?responses_after={new_after.isoformat()}#responses' new_before = (after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars[ 'responses_before_link'] = f'?responses_before={new_before.isoformat()}#responses' vars['next_poll'] = max( source.last_poll_attempt + source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in source.features: publishes = Publish.query().filter(Publish.source == source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urllib.parse.urlparse( w.target_url()).netloc in source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return render_template(f'{source.SHORT_NAME}_user.html', **vars)
def template_vars(self): if not self.source: return {} vars = super(UserHandler, self).template_vars() vars.update({ 'source': self.source, 'epoch': util.EPOCH, }) # Blog webmention promos if 'webmention' not in self.source.features: if self.source.SHORT_NAME in ('blogger', 'tumblr', 'wordpress'): vars[self.source.SHORT_NAME + '_promo'] = True else: for domain in self.source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (domain.endswith('tumblr.com') and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (domain.endswith('wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in self.source.features: vars['responses'] = [] for i, r in enumerate(Response.query() .filter(Response.source == self.source.key)\ .order(-Response.updated)): r.response = json.loads(r.response_json) if r.activity_json: # handle old entities r.activities_json.append(r.activity_json) r.activities = [json.loads(a) for a in r.activities_json] if (not gr_source.Source.is_public(r.response) or not all(gr_source.Source.is_public(a) for a in r.activities)): continue r.actor = r.response.get('author') or r.response.get('actor', {}) if not r.response.get('content'): phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'invite': 'is invited', } r.response['content'] = '%s %s.' % ( r.actor.get('displayName') or '', phrases.get(r.type) or phrases.get(r.response.get('verb'))) # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme(image_url, self) # generate original post links r.links = self.process_webmention_links(r) vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break # Publishes if 'publish' in self.source.features: publishes = Publish.query().filter(Publish.source == self.source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), a_class='original-post', new_tab=True) vars['publishes'] = publishes if 'webmention' in self.source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = self.process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link(b.key.id(), text=text, a_class='original-post', max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == self.source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link(w.source_url(), a_class='original-post', new_tab=True) try: target_is_source = (urlparse.urlparse(w.target_url()).netloc in self.source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link(w.target_url(), a_class='original-post', new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return vars
import instagram import tumblr import twitter import wordpress_rest domains = collections.defaultdict(int) # maps domain to # of users for cls in models.sources.values(): for src in cls.query(cls.domains > ''): for domain in src.domains: print domain domains[domain] += 1 with open('domains.txt', 'w') as f: f.write('domain,num_users\n') f.write('\n'.join(str(item) for item in reversed(sorted( '%s,%s' % (item[1], item[0]) for item in domains.items())))) with open('domains_sent.txt', 'w') as f: url = '' while True: resp = Response.query(Response.sent > url).get(projection=['sent']) if not resp: break domain = None for sent in resp.sent: parsed = urlparse.urlparse(sent) if sent > url and (domain is None or parsed.netloc < domain): domain = parsed.netloc url = urlparse.urlunparse(parsed[:2] + ('', '', '', '')) + chr(ord('/') + 1) print domain
import twitter import wordpress_rest domains = collections.defaultdict(int) # maps domain to # of users for cls in models.sources.values(): for src in cls.query(cls.domains > ''): for domain in src.domains: print domain domains[domain] += 1 with open('domains.txt', 'w') as f: f.write('domain,num_users\n') f.write('\n'.join( str(item) for item in reversed( sorted('%s,%s' % (item[1], item[0]) for item in domains.items())))) with open('domains_sent.txt', 'w') as f: url = '' while True: resp = Response.query(Response.sent > url).get(projection=['sent']) if not resp: break domain = None for sent in resp.sent: parsed = urlparse.urlparse(sent) if sent > url and (domain is None or parsed.netloc < domain): domain = parsed.netloc url = urlparse.urlunparse(parsed[:2] + ('', '', '', '')) + chr(ord('/') + 1) print domain