def test_domain_not_found(self): # no source msg = 'Could not find FakeSource account for foo.com.' self.source.key.delete() self.assert_error(msg) self.assertEquals(0, BlogWebmention.query().count()) # source without webmention feature self.source.features = ['listen'] self.source.put() self.assert_error(msg) self.assertEquals(0, BlogWebmention.query().count()) # source without domain self.source.features = ['webmention'] self.source.domains = ['asdfoo.com', 'foo.comedy'] self.source.put() self.assert_error(msg) self.assertEquals(0, BlogWebmention.query().count()) # source is disabled self.source.domains = ['foo.com'] self.source.status = 'disabled' self.source.put() self.assert_error(msg) self.assertEquals(0, BlogWebmention.query().count())
def test_domain_not_found(self): self.expect_requests_get('http://foo.com/post/1', status_code=404) for i in range(4): self.expect_requests_get('http://foo.com/post/1', '') self.mox.ReplayAll() # couldn't fetch source URL self.source.key.delete() self.assert_error('Could not fetch source URL http://foo.com/post/1') self.assertEquals(0, BlogWebmention.query().count()) # no source msg = 'Could not find FakeSource account for foo.com.' self.assert_error(msg) self.assertEquals(0, BlogWebmention.query().count()) # source without webmention feature self.source.features = ['listen'] self.source.put() self.assert_error(msg) self.assertEquals(0, BlogWebmention.query().count()) # source without domain self.source.features = ['webmention'] self.source.domains = ['asdfoo.com', 'foo.comedy'] self.source.put() self.assert_error(msg) self.assertEquals(0, BlogWebmention.query().count()) # source is disabled self.source.domains = ['foo.com'] self.source.status = 'disabled' self.source.put() self.assert_error(msg) self.assertEquals(0, BlogWebmention.query().count())
def test_source_link_not_found(self): html = '<article class="h-entry"></article>' self.expect_requests_get('http://bar.com/reply', html) self.mox.ReplayAll() self.assert_error('Could not find target URL') bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status)
def test_success(self): html = """ <article class="h-entry"> <p class="p-author">my name</p> <p class="e-content"> i hereby reply <a class="u-in-reply-to" href="http://foo.com/post/1"></a> </p></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'my name', 'http://foo.com/', 'i hereby reply\n<a class="u-in-reply-to" href="http://foo.com/post/1"></a>' ' <br /> <a href="http://bar.com/reply">via bar.com</a>' ).AndReturn({'id': 'fake id'}) self.mox.ReplayAll() resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) self.assertEquals({'id': 'fake id'}, json.loads(resp.body)) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals(self.source.key, bw.source) self.assertEquals('complete', bw.status) self.assertEquals('comment', bw.type) self.assertEquals(html, bw.html) self.assertEquals({'id': 'fake id'}, bw.published)
def test_target_path_blacklisted(self): bad = 'http://foo.com/blacklisted/1' self.assert_error( 'FakeSource webmentions are not supported for URL path: /blacklisted/1', target=bad, status=202) self.assertEquals(0, BlogWebmention.query().count())
def test_success(self): html = """ <article class="h-entry"> <p class="p-author">my name</p> <p class="e-content"> i hereby reply <a class="u-in-reply-to" href="http://foo.com/post/1"></a> </p></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'my name', 'http://foo.com/', 'i hereby reply\n<a class="u-in-reply-to" href="http://foo.com/post/1"></a>' ' <br /> <a href="http://bar.com/reply">via bar.com</a>' ).AndReturn({'id': 'fake id'}) self.mox.ReplayAll() resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) self.assertEquals({'id': 'fake id'}, json.loads(resp.body)) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals(self.source.key, bw.source) self.assertEquals('complete', bw.status) self.assertEquals('comment', bw.type) self.assertEquals(html, bw.html) self.assertEquals({'id': 'fake id'}, bw.published)
def test_u_url(self): html = """ <article class="h-entry"> <p class="p-name"></p> <!-- empty --> <p class="p-author">my name</p> <p class="e-content"> i hereby mention <a href="http://foo.com/post/1"></a> <a class="u-url" href="http://barzz.com/u/url"></a> </p></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'my name', 'http://foo.com/', """mentioned this in <a href="http://barzz.com/u/url">barzz.com/u/url</a>. <br /> <a href="http://barzz.com/u/url">via barzz.com</a>""" ).AndReturn({'id': 'fake id'}) self.mox.ReplayAll() resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status) self.assertEquals('post', bw.type) self.assertEquals('http://barzz.com/u/url', bw.u_url) self.assertEquals('http://barzz.com/u/url', bw.source_url())
def test_rel_canonical_different_domain(self): self.expect_requests_get('http://foo.zz/post/1', """ <head> <link href='http://foo.com/post/1' rel='canonical'/> </head> foo bar""") html = """ <article class="h-entry"><p class="e-content"> <a href="http://bar.com/mention">this post</a> i hereby <a href="http://foo.zz/post/1">mention</a> </p></article>""" self.expect_requests_get('http://bar.com/mention', html) testutil.FakeSource.create_comment( 'http://foo.zz/post/1', 'foo.zz', 'http://foo.zz/', 'mentioned this in <a href="http://bar.com/mention">bar.com/mention</a>. <br /> <a href="http://bar.com/mention">via bar.com</a>') self.mox.ReplayAll() resp = self.get_response('http://bar.com/mention', 'http://foo.zz/post/1') self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/mention http://foo.zz/post/1') self.assertEquals('complete', bw.status) self.assertEquals(html, bw.html)
def test_rel_canonical_different_domain(self): self.expect_requests_get( 'http://foo.zz/post/1', """ <head> <link href='http://foo.com/post/1' rel='canonical'/> </head> foo bar""") html = """ <article class="h-entry"><p class="e-content"> <a href="http://bar.com/mention">this post</a> i hereby <a href="http://foo.zz/post/1">mention</a> </p></article>""" self.expect_requests_get('http://bar.com/mention', html) testutil.FakeSource.create_comment( 'http://foo.zz/post/1', 'foo.zz', 'http://foo.zz/', 'mentioned this in <a href="http://bar.com/mention">bar.com/mention</a>. <br /> <a href="http://bar.com/mention">via bar.com</a>' ) self.mox.ReplayAll() resp = self.get_response('http://bar.com/mention', 'http://foo.zz/post/1') self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id( 'http://bar.com/mention http://foo.zz/post/1') self.assertEquals('complete', bw.status) self.assertEquals(html, bw.html)
def test_source_missing_mf2(self): html = 'no microformats here, run along' self.expect_requests_get('http://bar.com/reply', html) self.mox.ReplayAll() self.assert_error('No microformats2 data found') bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) self.assertEquals(html, bw.html)
def test_source_link_not_found(self): html = '<article class="h-entry"></article>' self.expect_requests_get('http://bar.com/reply', html) self.mox.ReplayAll() self.assert_error('Could not find target URL') bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) self.assertEquals(html, bw.html)
def test_create_comment_404s(self): self.expect_mention().AndRaise(exc.HTTPNotFound('gone baby gone')) self.mox.ReplayAll() self.assert_error('gone baby gone', status=404) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) self.assertEquals(self.mention_html, bw.html)
def test_source_missing_mf2(self): html = 'no microformats here, run along' self.expect_requests_get('http://bar.com/reply', html) self.mox.ReplayAll() self.assert_error('No microformats2 data found') bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) self.assertEquals(html, bw.html)
def test_create_comment_exception(self): self.expect_mention().AndRaise(exc.HTTPPaymentRequired()) self.mox.ReplayAll() resp = self.get_response() self.assertEquals(402, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) self.assertEquals(self.mention_html, bw.html)
def test_create_comment_exception(self): self.expect_mention().AndRaise(exc.HTTPPaymentRequired()) self.mox.ReplayAll() resp = self.get_response() self.assertEquals(402, resp.status_int, resp.body) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) self.assertEquals(self.mention_html, bw.html)
def test_create_comment_404s(self): self.expect_mention().AndRaise(exc.HTTPNotFound('gone baby gone')) self.mox.ReplayAll() self.assert_error('gone baby gone', status=404) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) self.assertEquals(self.mention_html, bw.html)
def test_strip_utm_query_params(self): """utm_* query params should be stripped from target URLs.""" self.expect_mention() self.mox.ReplayAll() resp = self.get_response(target=urllib.quote( 'http://foo.com/post/1?utm_source=x&utm_medium=y')) self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status)
def test_create_comment_exception(self): self.expect_mention().AndRaise(exceptions.NotAcceptable()) self.mox.ReplayAll() resp = self.post() self.assertEqual(406, resp.status_code, resp.get_data(as_text=True)) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEqual('failed', bw.status) self.assertEqual(self.mention_html, bw.html)
def test_strip_utm_query_params(self): """utm_* query params should be stripped from target URLs.""" self.expect_mention() self.mox.ReplayAll() resp = self.get_response(target=urllib.quote( 'http://foo.com/post/1?utm_source=x&utm_medium=y')) self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status)
def test_strip_utm_query_params(self): """utm_* query params should be stripped from target URLs.""" self.expect_mention() self.mox.ReplayAll() resp = self.post( target='http://foo.com/post/1?utm_source=x&utm_medium=y') self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEqual('complete', bw.status)
def test_create_comment_401_disables_source(self): self.expect_mention().AndRaise(exc.HTTPUnauthorized('no way')) self.mox.ReplayAll() self.assert_error('no way', status=401) source = self.source.key.get() self.assertEquals('disabled', source.status) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) self.assertEquals(self.mention_html, bw.html)
def test_repeated(self): # 1) first a failure self.expect_requests_get('http://bar.com/reply', '') # 2) should allow retrying, this one will succeed self.expect_requests_get( 'http://bar.com/reply', """ <article class="h-entry"> <a class="u-url" href="http://bar.com/reply"></a> <a class="u-repost-of" href="http://foo.com/post/1"></a> </article>""") testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', 'reposted this. <br /> <a href="http://bar.com/reply">via bar.com</a>' ) # 3) after success, another is a noop and returns 200 # TODO: check for "updates not supported" message self.mox.ReplayAll() # now the webmention requests. 1) failure self.assert_error('No microformats2 data found') bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) # 2) success resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status) self.assertEquals('repost', bw.type) # 3) noop repeated success # source without webmention feature resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status)
def test_create_comment_401_disables_source(self): self.expect_mention().AndRaise(exc.HTTPUnauthorized('no way')) self.mox.ReplayAll() self.assert_error('no way', status=401) source = self.source.key.get() self.assertEquals('disabled', source.status) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) self.assertEquals(self.mention_html, bw.html)
def test_repeated(self): # 1) first a failure self.expect_requests_get('http://bar.com/reply', '') # 2) should allow retrying, this one will succeed self.expect_requests_get('http://bar.com/reply', """ <article class="h-entry"> <a class="u-url" href="http://bar.com/reply"></a> <a class="u-repost-of" href="http://foo.com/post/1"></a> </article>""") testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', 'reposted this. <br /> <a href="http://bar.com/reply">via bar.com</a>') # 3) after success, another is a noop and returns 200 # TODO: check for "updates not supported" message self.mox.ReplayAll() # now the webmention requests. 1) failure self.assert_error('No microformats2 data found') bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) # 2) success resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status) self.assertEquals('repost', bw.type) # 3) noop repeated success # source without webmention feature resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status)
def test_source_link_check_ignores_fragment(self): html = """\ <article class="h-entry"><p class="e-content"> <span class="p-name">my post</span> <a href="http://foo.com/post/1"></a> </p></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', 'mentioned this in <a href="http://bar.com/reply">my post</a>. <br /> <a href="http://bar.com/reply">via bar.com</a>') self.mox.ReplayAll() resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status)
def test_create_comment_exception(self): html = """\ <article class="h-entry"><p class="e-content"> <span class="p-name">my post</span> http://foo.com/post/1 </p></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', mox.IgnoreArg() ).AndRaise(exc.HTTPPaymentRequired()) self.mox.ReplayAll() resp = self.get_response() self.assertEquals(402, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('failed', bw.status) self.assertEquals(html, bw.html)
def test_target_redirects(self): html = """\ <article class="h-entry"><p class="e-content"> http://second/ </p></article>""" redirects = ['http://second/', 'http://foo.com/final'] self.expect_requests_head('http://first/', redirected_url=redirects) self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/final', 'foo.com', 'http://foo.com/', mox.IgnoreArg()) self.mox.ReplayAll() resp = self.get_response(target='http://first/') self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/final') self.assertEquals('complete', bw.status) self.assertEquals(['http://first/', 'http://second/'], bw.redirected_target_urls)
def test_strip_utm_query_params(self): """utm_* query params should be stripped from target URLs.""" html = """\ <article class="h-entry"><p class="e-content"> <span class="p-name">my post</span> http://foo.com/post/1 </p></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', 'mentioned this in <a href="http://bar.com/reply">my post</a>. <br /> <a href="http://bar.com/reply">via bar.com</a>') self.mox.ReplayAll() resp = self.get_response(target=urllib.quote( 'http://foo.com/post/1?utm_source=x&utm_medium=y')) self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status)
def test_source_link_check_ignores_fragment(self): html = """\ <article class="h-entry"><p class="e-content"> <span class="p-name">my post</span> <a href="http://foo.com/post/1"></a> </p></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', 'mentioned this in <a href="http://bar.com/reply">my post</a>. <br /> <a href="http://bar.com/reply">via bar.com</a>' ) self.mox.ReplayAll() resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status)
def test_source_link_check_ignores_fragment(self): html = """\ <article class="h-entry"><p class="e-content"> <a href="http://bar.com/reply">(permalink)</a> <span class="p-name">my post</span> <a href="http://foo.com/post/1"></a> </p></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'foo.com', 'http://foo.com/', 'mentioned this in <a href="http://bar.com/reply">my post</a>. <br /> <a href="http://bar.com/reply">via bar.com</a>' ).AndReturn({'id': 'fake id'}) self.mox.ReplayAll() resp = self.post() self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEqual('complete', bw.status)
def test_target_redirects(self): html = """\ <article class="h-entry"><p class="e-content"> http://second/ </p></article>""" redirects = ['http://second/', 'http://foo.com/final'] self.expect_requests_head('http://first/', redirected_url=redirects) self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment('http://foo.com/final', 'foo.com', 'http://foo.com/', mox.IgnoreArg()) self.mox.ReplayAll() resp = self.get_response(target='http://first/') self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/final') self.assertEquals('complete', bw.status) self.assertEquals(['http://first/', 'http://second/'], bw.redirected_target_urls)
def test_domain_translates_to_lowercase(self): html = """\ <article class="h-entry"><p class="e-content"> <span class="p-name">my post</span> X http://FoO.cOm/post/1 </p></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://FoO.cOm/post/1', 'foo.com', 'http://foo.com/', 'mentioned this in <a href="http://bar.com/reply">my post</a>. <br /> <a href="http://bar.com/reply">via bar.com</a>' ) self.mox.ReplayAll() resp = self.get_response(target='http://FoO.cOm/post/1') self.assertEqual(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://FoO.cOm/post/1') self.assertEqual('complete', bw.status)
def test_target_redirects(self): html = """\ <article class="h-entry"><p class="e-content"> http://second/ </p></article>""" redirects = ['http://second/', 'http://foo.com/final'] self.expect_requests_head('http://first/', redirected_url=redirects) self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/final', 'foo.com', 'http://foo.com/', mox.IgnoreArg()).AndReturn({'id': 'fake id'}) self.mox.ReplayAll() resp = self.post(target='http://first/') self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/final') self.assertEqual('complete', bw.status) self.assertEqual(['http://first/', 'http://second/'], bw.redirected_target_urls)
def _test_success(self, html): self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'my name', 'http://foo.com/', 'i hereby reply\n<a class="u-in-reply-to" href="http://foo.com/post/1"></a>' ' <br /> <a href="http://bar.com/reply">via bar.com</a>' ).AndReturn({'id': 'fake id'}) self.mox.ReplayAll() resp = self.post() self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) self.assertEqual({'id': 'fake id'}, resp.json) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEqual(self.source.key, bw.source) self.assertEqual('complete', bw.status) self.assertEqual('comment', bw.type) self.assertEqual(html, bw.html) self.assertEqual({'id': 'fake id'}, bw.published)
def test_unicode_in_target_and_source_urls(self): """Unicode chars in target and source URLs should work.""" # note the … and ✁ chars target = u'http://foo.com/2014/11/23/england-german…iendly-wembley' source = u'http://bar.com/✁/1' html = u"""\ <meta charset="utf-8"> <article class="h-entry"><p class="e-content"> <span class="p-name">my post</span> %s </p></article>""" % target self.expect_requests_get(source, html) comment = u'mentioned this in <a href="%s">my post</a>. <br /> <a href="%s">via bar.com</a>' % (source, source) testutil.FakeSource.create_comment(target, 'foo.com', 'http://foo.com/', comment) self.mox.ReplayAll() resp = self.get_response(source=source, target=target) self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id(' '.join((source, target))) self.assertEquals('complete', bw.status)
def test_reply_outside_e_content(self): html = """ <article class="h-entry"> <p class="p-author">my name</p> <p class="p-in-reply-to h-cite"><a href="http://foo.com/post/1"></a></p> <div class="e-content"> i hereby reply </div></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'my name', 'http://foo.com/', 'i hereby reply <br /> <a href="http://bar.com/reply">via bar.com</a>' ).AndReturn({'id': 'fake id'}) self.mox.ReplayAll() resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status) self.assertEquals({'id': 'fake id'}, bw.published)
def test_u_url(self): html = """ <article class="h-entry"> <p class="p-name"></p> <!-- empty --> <p class="p-author">my name</p> <p class="e-content"> i hereby mention <a href="http://foo.com/post/1"></a> <a class="u-url" href="http://barzz.com/u/url"></a> </p></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'my name', 'http://foo.com/', """mentioned this in <a href="http://barzz.com/u/url">barzz.com/u/url</a>. <br /> <a href="http://barzz.com/u/url">via barzz.com</a>""" ).AndReturn({'id': 'fake id'}) self.mox.ReplayAll() resp = self.get_response() self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id('http://bar.com/reply http://foo.com/post/1') self.assertEquals('complete', bw.status) self.assertEquals('post', bw.type) self.assertEquals('http://barzz.com/u/url', bw.u_url) self.assertEquals('http://barzz.com/u/url', bw.source_url())
def test_reply_outside_e_content(self): html = """ <article class="h-entry"> <p class="p-author">my name</p> <p class="p-in-reply-to h-cite"><a href="http://foo.com/post/1"></a></p> <div class="e-content"> i hereby reply </div></article>""" self.expect_requests_get('http://bar.com/reply', html) testutil.FakeSource.create_comment( 'http://foo.com/post/1', 'my name', 'http://foo.com/', 'i hereby reply <br /> <a href="http://bar.com/reply">via bar.com</a>' ).AndReturn({'id': 'fake id'}) self.mox.ReplayAll() resp = self.post() self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) bw = BlogWebmention.get_by_id( 'http://bar.com/reply http://foo.com/post/1') self.assertEqual('complete', bw.status) self.assertEqual({'id': 'fake id'}, bw.published) self.assertEqual(html, bw.html)
def test_unicode_in_target_and_source_urls(self): """Unicode chars in target and source URLs should work.""" # note the … and ✁ chars target = 'http://foo.com/2014/11/23/england-german…iendly-wembley' source = 'http://bar.com/✁/1' html = f""" <meta charset="utf-8"> <article class="h-entry"><p class="e-content"> <span class="p-name">my post</span> {target} </p></article>""" self.expect_requests_get(source, html) comment = f'mentioned this in <a href="{source}">my post</a>. <br /> <a href="{source}">via bar.com</a>' testutil.FakeSource.create_comment( target, 'foo.com', 'http://foo.com/', comment).AndReturn({'id': 'fake id'}) self.mox.ReplayAll() resp = self.post(source=source, target=target) self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) bw = BlogWebmention.get_by_id(' '.join((source, target))) self.assertEqual('complete', bw.status)
def test_unicode_in_target_and_source_urls(self): """Unicode chars in target and source URLs should work.""" # note the … and ✁ chars target = 'http://foo.com/2014/11/23/england-german…iendly-wembley' source = 'http://bar.com/✁/1' html = u"""\ <meta charset="utf-8"> <article class="h-entry"><p class="e-content"> <span class="p-name">my post</span> %s </p></article>""" % target self.expect_requests_get(source, html) comment = 'mentioned this in <a href="%s">my post</a>. <br /> <a href="%s">via bar.com</a>' % ( source, source) testutil.FakeSource.create_comment(target, 'foo.com', 'http://foo.com/', comment) self.mox.ReplayAll() resp = self.get_response(source=source, target=target) self.assertEquals(200, resp.status_int, resp.body) bw = BlogWebmention.get_by_id(' '.join((source, target))) self.assertEquals('complete', bw.status)
def dispatch_request(self, site): logger.info(f'Params: {list(request.values.items())}') # strip fragments from source and target url self.source_url = urllib.parse.urldefrag(request.form['source'])[0] self.target_url = urllib.parse.urldefrag(request.form['target'])[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: self.error(f'Could not parse target URL {self.target_url}') # look up source by domain source_cls = models.sources[site] domain = domain.lower() self.source = (source_cls.query() .filter(source_cls.domains == domain) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: # check for a rel-canonical link. Blogger uses these when it serves a post # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs # epeus.blogspot.com. # https://github.com/snarfed/bridgy/issues/805 mf2 = self.fetch_mf2(self.target_url, require_mf2=False) if not mf2: # fetch_mf2() already wrote the error response return domains = util.dedupe_urls( util.domain_from_link(url) for url in mf2[1]['rels'].get('canonical', [])) if domains: self.source = (source_cls.query() .filter(source_cls.domains.IN(domains)) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: self.error( f'Could not find {source_cls.GR_CLASS.NAME} account for {domain}. Is it registered with Bridgy?') # check that the target URL path is supported target_path = urllib.parse.urlparse(self.target_url).path if target_path in ('', '/'): msg = 'Home page webmentions are not currently supported.' logger.info(msg) return {'error': msg}, 202 for pattern in self.source.PATH_BLOCKLIST: if pattern.match(target_path): msg = f'{self.source.GR_CLASS.NAME} webmentions are not supported for URL path: {target_path}' logger.info(msg) return {'error': msg}, 202 # create BlogWebmention entity id = f'{self.source_url} {self.target_url}' self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported return self.entity.published logger.debug(f'BlogWebmention entity: {self.entity.key.urlsafe().decode()}') # fetch source page fetched = self.fetch_mf2(self.source_url) if not fetched: return resp, mf2 = fetched item = self.find_mention_item(mf2.get('items', [])) if not item: self.error(f'Could not find target URL {self.target_url} in source page {resp.url}', data=mf2, log_exception=False) # default author to target domain author_name = domain author_url = f'http://{domain}/' # extract author name and URL from h-card, if any props = item['properties'] author = get_first(props, 'author') if author: if isinstance(author, str): author_name = author else: author_props = author.get('properties', {}) author_name = get_first(author_props, 'name') author_url = get_first(author_props, 'url') # if present, u-url overrides source url u_url = get_first(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += f' <br /> <a href="{source_url}">via {util.domain_from_link(source_url)}</a>' # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception as e: code, body = util.interpret_http_exception(e) msg = f'Error: {code}: {e}; {body}' if code == '401': logger.warning(f'Disabling source due to: {e}', exc_info=True) self.source.status = 'disabled' self.source.put() self.error(msg, status=code, report=self.source.is_beta_user()) elif code == '404': # post is gone self.error(msg, status=code, report=False) elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): self.error(msg, status=502, report=False) elif code or body: self.error(msg, status=code, report=True) else: raise # write results to datastore self.entity.status = 'complete' self.entity.put() return self.entity.published
def template_vars(self): if not self.source: return {} vars = super(UserHandler, self).template_vars() vars.update({ 'source': self.source, 'epoch': util.EPOCH, }) # Blog webmention promos if 'webmention' not in self.source.features: if self.source.SHORT_NAME in ('blogger', 'tumblr', 'wordpress'): vars[self.source.SHORT_NAME + '_promo'] = True else: for domain in self.source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (domain.endswith('tumblr.com') and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (domain.endswith('wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in self.source.features: vars['responses'] = [] for i, r in enumerate(Response.query() .filter(Response.source == self.source.key)\ .order(-Response.updated)): r.response = json.loads(r.response_json) if r.activity_json: # handle old entities r.activities_json.append(r.activity_json) r.activities = [json.loads(a) for a in r.activities_json] if (not gr_source.Source.is_public(r.response) or not all(gr_source.Source.is_public(a) for a in r.activities)): continue r.actor = r.response.get('author') or r.response.get('actor', {}) if not r.response.get('content'): phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'invite': 'is invited', } r.response['content'] = '%s %s.' % ( r.actor.get('displayName') or '', phrases.get(r.type) or phrases.get(r.response.get('verb'))) # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme(image_url, self) # generate original post links r.links = self.process_webmention_links(r) vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break # Publishes if 'publish' in self.source.features: publishes = Publish.query().filter(Publish.source == self.source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), a_class='original-post', new_tab=True) vars['publishes'] = publishes if 'webmention' in self.source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = self.process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link(b.key.id(), text=text, a_class='original-post', max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == self.source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link(w.source_url(), a_class='original-post', new_tab=True) try: target_is_source = (urlparse.urlparse(w.target_url()).netloc in self.source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link(w.target_url(), a_class='original-post', new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return vars
def post(self, source_short_name): logging.info('Params: %self', self.request.params.items()) # strip fragments from source and target url self.source_url = urlparse.urldefrag( util.get_required_param(self, 'source'))[0] self.target_url = urlparse.urldefrag( util.get_required_param(self, 'target'))[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error('Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = models.sources[source_short_name] domain = domain.lower() self.source = (source_cls.query().filter( source_cls.domains == domain).filter( source_cls.features == 'webmention').filter( source_cls.status == 'enabled').get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.GR_CLASS.NAME, domain)) if urlparse.urlparse(self.target_url).path in ('', '/'): return self.error( 'Home page webmentions are not currently supported.') # create BlogWebmention entity id = u'%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe()) # fetch source page resp = self.fetch_mf2(self.source_url) if not resp: return self.fetched, data = resp item = self.find_mention_item(data) if not item: return self.error( 'Could not find target URL %s in source page %s' % (self.target_url, self.fetched.url), data=data, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, basestring): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][ 0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += ' <br /> <a href="%s">via %s</a>' % ( source_url, util.domain_from_link(source_url)) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception as e: code, body = util.interpret_http_exception(e) msg = 'Error: %s %s; %s' % (code, e, body) if code == '401': logging.warning('Disabling source due to: %s' % e, exc_info=True) self.source.status = 'disabled' self.source.put() return self.error(msg, status=code, mail=self.source.is_beta_user()) elif code == '404': # post is gone return self.error(msg, status=code, mail=False) elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): return self.error(msg, status=util.ERROR_HTTP_RETURN_CODE, mail=False) elif code or body: return self.error(msg, status=code, mail=True) else: raise # write results to datastore self.entity.status = 'complete' self.entity.put() self.response.write(json.dumps(self.entity.published))
def post(self, source_short_name): logging.info('Params: %s', list(self.request.params.items())) # strip fragments from source and target url self.source_url = urllib.parse.urldefrag( util.get_required_param(self, 'source'))[0] self.target_url = urllib.parse.urldefrag( util.get_required_param(self, 'target'))[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error('Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = models.sources[source_short_name] domain = domain.lower() self.source = (source_cls.query().filter( source_cls.domains == domain).filter( source_cls.features == 'webmention').filter( source_cls.status == 'enabled').get()) if not self.source: # check for a rel-canonical link. Blogger uses these when it serves a post # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs # epeus.blogspot.com. # https://github.com/snarfed/bridgy/issues/805 mf2 = self.fetch_mf2(self.target_url, require_mf2=False) if not mf2: # fetch_mf2() already wrote the error response return domains = util.dedupe_urls( util.domain_from_link(url) for url in mf2[1]['rels'].get('canonical', [])) if domains: self.source = (source_cls.query().filter( source_cls.domains.IN(domains)).filter( source_cls.features == 'webmention').filter( source_cls.status == 'enabled').get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.GR_CLASS.NAME, domain)) # check that the target URL path is supported target_path = urllib.parse.urlparse(self.target_url).path if target_path in ('', '/'): return self.error( 'Home page webmentions are not currently supported.', status=202) for pattern in self.source.PATH_BLOCKLIST: if pattern.match(target_path): return self.error( '%s webmentions are not supported for URL path: %s' % (self.source.GR_CLASS.NAME, target_path), status=202) # create BlogWebmention entity id = '%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe().decode()) # fetch source page fetched = self.fetch_mf2(self.source_url) if not fetched: return resp, mf2 = fetched item = self.find_mention_item(mf2.get('items', [])) if not item: return self.error( 'Could not find target URL %s in source page %s' % (self.target_url, resp.url), data=mf2, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, str): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][ 0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += ' <br /> <a href="%s">via %s</a>' % ( source_url, util.domain_from_link(source_url)) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception as e: code, body = util.interpret_http_exception(e) msg = 'Error: %s %s; %s' % (code, e, body) if code == '401': logging.warning('Disabling source due to: %s' % e, stack_info=True) self.source.status = 'disabled' self.source.put() return self.error(msg, status=code, report=self.source.is_beta_user()) elif code == '404': # post is gone return self.error(msg, status=code, report=False) elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): return self.error(msg, status=util.ERROR_HTTP_RETURN_CODE, report=False) elif code or body: return self.error(msg, status=code, report=True) else: raise # write results to datastore self.entity.status = 'complete' self.entity.put() self.response.write(json_dumps(self.entity.published))
def post(self, source_short_name): logging.info('Params: %self', self.request.params.items()) # strip fragments from source and target url self.source_url = urlparse.urldefrag(util.get_required_param(self, 'source'))[0] self.target_url = urlparse.urldefrag(util.get_required_param(self, 'target'))[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error('Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = models.sources[source_short_name] domain = domain.lower() self.source = (source_cls.query() .filter(source_cls.domains == domain) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: # check for a rel-canonical link. Blogger uses these when it serves a post # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs # epeus.blogspot.com. # https://github.com/snarfed/bridgy/issues/805 mf2 = self.fetch_mf2(self.target_url, require_mf2=False) if not mf2: # fetch_mf2() already wrote the error response return domains = util.dedupe_urls( util.domain_from_link(url) for url in mf2[1].get('rels', {}).get('canonical', [])) if domains: self.source = (source_cls.query() .filter(source_cls.domains.IN(domains)) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.GR_CLASS.NAME, domain)) # check that the target URL path is supported target_path = urlparse.urlparse(self.target_url).path if target_path in ('', '/'): return self.error('Home page webmentions are not currently supported.', status=202) for pattern in self.source.PATH_BLACKLIST: if pattern.match(target_path): return self.error('%s webmentions are not supported for URL path: %s' % (self.source.GR_CLASS.NAME, target_path), status=202) # create BlogWebmention entity id = '%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe()) # fetch source page resp = self.fetch_mf2(self.source_url) if not resp: return self.fetched, data = resp item = self.find_mention_item(data.get('items', [])) if not item: return self.error('Could not find target URL %s in source page %s' % (self.target_url, self.fetched.url), data=data, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, basestring): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += ' <br /> <a href="%s">via %s</a>' % ( source_url, util.domain_from_link(source_url)) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception as e: code, body = util.interpret_http_exception(e) msg = 'Error: %s %s; %s' % (code, e, body) if code == '401': logging.warning('Disabling source due to: %s' % e, exc_info=True) self.source.status = 'disabled' self.source.put() return self.error(msg, status=code, mail=self.source.is_beta_user()) elif code == '404': # post is gone return self.error(msg, status=code, mail=False) elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): return self.error(msg, status=util.ERROR_HTTP_RETURN_CODE, mail=False) elif code or body: return self.error(msg, status=code, mail=True) else: raise # write results to datastore self.entity.status = 'complete' self.entity.put() self.response.write(json.dumps(self.entity.published))
def user(site, id): """View for a user page.""" cls = models.sources.get(site) if not cls: return render_template('user_not_found.html'), 404 source = cls.lookup(id) if not source: key = cls.query( ndb.OR(*[ ndb.GenericProperty(prop) == id for prop in ('domains', 'inferred_username', 'name', 'username') ])).get(keys_only=True) if key: return redirect(cls(key=key).bridgy_path(), code=301) if not source or not source.features: return render_template('user_not_found.html'), 404 source.verify() source = util.preprocess_source(source) vars = { 'source': source, 'logs': logs, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, } # Blog webmention promos if 'webmention' not in source.features: if source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'): vars[source.SHORT_NAME + '_promo'] = True else: for domain in source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (util.domain_or_parent_in(domain, ['tumblr.com']) and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (util.domain_or_parent_in(domain, 'wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in source.features or 'email' in source.features: vars['responses'] = [] query = Response.query().filter(Response.source == source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = request.values.get(param) try: return util.parse_iso8601(val.replace(' ', '+')) if val else None except BaseException: error(f"Couldn't parse {param}, {val!r} as ISO8601") before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: error("can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order( Response.updated) elif before: query = query.filter( Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json_loads(r.response_json) r.activities = [json_loads(a) for a in r.activities_json] if (not source.is_activity_public(r.response) or not all( source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] verb = r.response.get('verb') r.actor = (r.response.get('object') if verb == 'invite' else r.response.get('author') or r.response.get('actor')) or {} activity_content = '' for a in r.activities + [r.response]: if not a.get('content'): obj = a.get('object', {}) a['content'] = activity_content = ( obj.get('content') or obj.get('displayName') or # historical, from a Reddit bug fixed in granary@4f9df7c obj.get('name') or '') response_content = r.response.get('content') phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } phrase = phrases.get(r.type) or phrases.get(verb) if phrase and (r.type != 'repost' or activity_content.startswith(response_content)): r.response[ 'content'] = f'{r.actor.get("displayName") or ""} {phrase}.' # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme( image_url, request) # generate original post links r.links = process_webmention_links(r) r.original_links = [ util.pretty_link(url, new_tab=True) for url in r.original_posts ] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = (before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars[ 'responses_after_link'] = f'?responses_after={new_after.isoformat()}#responses' new_before = (after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars[ 'responses_before_link'] = f'?responses_before={new_before.isoformat()}#responses' vars['next_poll'] = max( source.last_poll_attempt + source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in source.features: publishes = Publish.query().filter(Publish.source == source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urllib.parse.urlparse( w.target_url()).netloc in source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return render_template(f'{source.SHORT_NAME}_user.html', **vars)
def test_target_is_home_page(self): self.assert_error('Home page webmentions are not currently supported.', target='http://foo.com/') self.assertEquals(0, BlogWebmention.query().count())
def post(self, source_short_name): logging.info('Params: %self', self.request.params.items()) # strip fragments from source and target url self.source_url = urlparse.urldefrag(util.get_required_param(self, 'source'))[0] self.target_url = urlparse.urldefrag(util.get_required_param(self, 'target'))[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error('Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = models.sources[source_short_name] domain = domain.lower() self.source = (source_cls.query() .filter(source_cls.domains == domain) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.GR_CLASS.NAME, domain)) if urlparse.urlparse(self.target_url).path in ('', '/'): return self.error('Home page webmentions are not currently supported.') # create BlogWebmention entity id = u'%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug('BlogWebmention entity: %s', self.entity.key.urlsafe()) # fetch source page resp = self.fetch_mf2(self.source_url) if not resp: return self.fetched, data = resp item = self.find_mention_item(data) if not item: return self.error('Could not find target URL %s in source page %s' % (self.target_url, self.fetched.url), data=data, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, basestring): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += ' <br /> <a href="%s">via %s</a>' % ( source_url, util.domain_from_link(source_url)) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception, e: code, body = util.interpret_http_exception(e) msg = 'Error: %s %s; %s' % (code, e, body) if code == '401': logging.warning('Disabling source!') self.source.status = 'disabled' self.source.put() return self.error(msg, status=code, mail=False) elif code == '404': # post is gone return self.error(msg, status=code, mail=False) elif code or body: return self.error(msg, status=code, mail=True) else: raise
def post(self, source_short_name): logging.info('Params: %self', self.request.params.items()) # strip fragments from source and target url self.source_url = urlparse.urldefrag(util.get_required_param(self, 'source'))[0] self.target_url = urlparse.urldefrag(util.get_required_param(self, 'target'))[0] # clean target url (strip utm_* query params) self.target_url = util.clean_webmention_url(self.target_url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error(msg, 'Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = SOURCES[source_short_name] domain = domain.lower() self.source = (source_cls.query() .filter(source_cls.domains == domain) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.AS_CLASS.NAME, domain), mail=False) # create BlogWebmention entity id = '%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert(id, source=self.source.key) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug('BlogWebmention entity: %s', self.entity.key.urlsafe()) # fetch source page resp = self.fetch_mf2(self.source_url) if not resp: return self.fetched, data = resp item = self.find_mention_item(data) if not item: return self.error('Could not find target URL %s in source page %s' % (self.target_url, self.fetched.url), data=data, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, basestring): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() text += ' <br /> <a href="%s">via %s</a>' % ( self.entity.source_url(), util.domain_from_link(self.entity.source_url())) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except urllib2.HTTPError, e: body = e.read() logging.error('Error response body: %r', body) return self.error('Error: %s; %s' % (e, body), status=e.code)
def template_vars(self): vars = super(UserHandler, self).template_vars() vars.update({ 'source': self.source, 'EPOCH': util.EPOCH, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, }) if not self.source: return vars if isinstance(self.source, instagram.Instagram): auth = self.source.auth_entity vars['indieauth_me'] = ( auth.id if isinstance(auth, indieauth.IndieAuth) else self.source.domain_urls[0] if self.source.domain_urls else None) # Blog webmention promos if 'webmention' not in self.source.features: if self.source.SHORT_NAME in ('blogger', 'tumblr', 'wordpress'): vars[self.source.SHORT_NAME + '_promo'] = True else: for domain in self.source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (domain.endswith('tumblr.com') and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (domain.endswith('wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in self.source.features: vars['responses'] = [] query = Response.query().filter(Response.source == self.source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = self.request.get(param) try: return util.parse_iso8601(val) if val else None except: msg = "Couldn't parse %s %r as ISO8601" % (param, val) logging.exception(msg) self.abort(400, msg) before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: self.abort(400, "can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order(Response.updated) elif before: query = query.filter(Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json.loads(r.response_json) r.activities = [json.loads(a) for a in r.activities_json] if (not self.source.is_activity_public(r.response) or not all(self.source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] r.actor = r.response.get('author') or r.response.get('actor', {}) for a in r.activities + [r.response]: if not a.get('content'): a['content'] = a.get('object', {}).get('content') if not r.response.get('content'): phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } r.response['content'] = '%s %s.' % ( r.actor.get('displayName') or '', phrases.get(r.type) or phrases.get(r.response.get('verb'))) # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme(image_url, self) # generate original post links r.links = self.process_webmention_links(r) r.original_links = [util.pretty_link(url, new_tab=True) for url in r.original_posts] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = ( before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars['responses_after_link'] = ('?responses_after=%s#responses' % new_after.isoformat()) new_before = ( after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars['responses_before_link'] = ('?responses_before=%s#responses' % new_before.isoformat()) vars['next_poll'] = max( self.source.last_poll_attempt + self.source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in self.source.features: publishes = Publish.query().filter(Publish.source == self.source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id(), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in self.source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = self.process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == self.source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urlparse.urlparse(w.target_url()).netloc in self.source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return vars
def template_vars(self): vars = super(UserHandler, self).template_vars() vars.update({ 'source': self.source, 'EPOCH': util.EPOCH, 'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER, 'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD, }) if not self.source: return vars if isinstance(self.source, instagram.Instagram): auth = self.source.auth_entity vars['indieauth_me'] = ( auth.id if isinstance(auth, indieauth.IndieAuth) else self.source.domain_urls[0] if self.source.domain_urls else None) # Blog webmention promos if 'webmention' not in self.source.features: if self.source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'): vars[self.source.SHORT_NAME + '_promo'] = True else: for domain in self.source.domains: if ('.blogspot.' in domain and # Blogger uses country TLDs not Blogger.query(Blogger.domains == domain).get()): vars['blogger_promo'] = True elif (domain.endswith('tumblr.com') and not Tumblr.query(Tumblr.domains == domain).get()): vars['tumblr_promo'] = True elif (domain.endswith('wordpress.com') and not WordPress.query(WordPress.domains == domain).get()): vars['wordpress_promo'] = True # Responses if 'listen' in self.source.features: vars['responses'] = [] query = Response.query().filter(Response.source == self.source.key) # if there's a paging param (responses_before or responses_after), update # query with it def get_paging_param(param): val = self.request.get(param) try: return util.parse_iso8601(val) if val else None except: msg = "Couldn't parse %s %r as ISO8601" % (param, val) logging.exception(msg) self.abort(400, msg) before = get_paging_param('responses_before') after = get_paging_param('responses_after') if before and after: self.abort(400, "can't handle both responses_before and responses_after") elif after: query = query.filter(Response.updated > after).order(Response.updated) elif before: query = query.filter(Response.updated < before).order(-Response.updated) else: query = query.order(-Response.updated) query_iter = query.iter() for i, r in enumerate(query_iter): r.response = json.loads(r.response_json) r.activities = [json.loads(a) for a in r.activities_json] if (not self.source.is_activity_public(r.response) or not all(self.source.is_activity_public(a) for a in r.activities)): continue elif r.type == 'post': r.activities = [] r.actor = r.response.get('author') or r.response.get('actor', {}) for a in r.activities + [r.response]: if not a.get('content'): a['content'] = a.get('object', {}).get('content') if not r.response.get('content'): phrases = { 'like': 'liked this', 'repost': 'reposted this', 'rsvp-yes': 'is attending', 'rsvp-no': 'is not attending', 'rsvp-maybe': 'might attend', 'rsvp-interested': 'is interested', 'invite': 'is invited', } r.response['content'] = '%s %s.' % ( r.actor.get('displayName') or '', phrases.get(r.type) or phrases.get(r.response.get('verb'))) # convert image URL to https if we're serving over SSL image_url = r.actor.setdefault('image', {}).get('url') if image_url: r.actor['image']['url'] = util.update_scheme(image_url, self) # generate original post links r.links = self.process_webmention_links(r) r.original_links = [util.pretty_link(url, new_tab=True) for url in r.original_posts] vars['responses'].append(r) if len(vars['responses']) >= 10 or i > 200: break vars['responses'].sort(key=lambda r: r.updated, reverse=True) # calculate new paging param(s) new_after = ( before if before else vars['responses'][0].updated if vars['responses'] and query_iter.probably_has_next() and (before or after) else None) if new_after: vars['responses_after_link'] = ('?responses_after=%s#responses' % new_after.isoformat()) new_before = ( after if after else vars['responses'][-1].updated if vars['responses'] and query_iter.probably_has_next() else None) if new_before: vars['responses_before_link'] = ('?responses_before=%s#responses' % new_before.isoformat()) vars['next_poll'] = max( self.source.last_poll_attempt + self.source.poll_period(), # lower bound is 1 minute from now util.now_fn() + datetime.timedelta(seconds=90)) # Publishes if 'publish' in self.source.features: publishes = Publish.query().filter(Publish.source == self.source.key)\ .order(-Publish.updated)\ .fetch(10) for p in publishes: p.pretty_page = util.pretty_link( p.key.parent().id().decode('utf-8'), attrs={'class': 'original-post u-url u-name'}, new_tab=True) vars['publishes'] = publishes if 'webmention' in self.source.features: # Blog posts blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\ .order(-BlogPost.created)\ .fetch(10) for b in blogposts: b.links = self.process_webmention_links(b) try: text = b.feed_item.get('title') except ValueError: text = None b.pretty_url = util.pretty_link( b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'}, max_length=40, new_tab=True) # Blog webmentions webmentions = BlogWebmention.query()\ .filter(BlogWebmention.source == self.source.key)\ .order(-BlogWebmention.updated)\ .fetch(10) for w in webmentions: w.pretty_source = util.pretty_link( w.source_url(), attrs={'class': 'original-post'}, new_tab=True) try: target_is_source = (urlparse.urlparse(w.target_url()).netloc in self.source.domains) except BaseException: target_is_source = False w.pretty_target = util.pretty_link( w.target_url(), attrs={'class': 'original-post'}, new_tab=True, keep_host=target_is_source) vars.update({'blogposts': blogposts, 'webmentions': webmentions}) return vars