def test_combined_reply_and_tag_of_error(self): """https://github.com/snarfed/bridgy/issues/832""" with self.assertRaises(NotImplementedError): microformats2.json_to_object({ 'type': ['h-entry'], 'properties': { 'tag-of': [{'value': 'https://a/post'}], 'in-reply-to': [{'value': 'https://another/post'}], } })
def test_combined_reply_and_tag_of_error(self): """https://github.com/snarfed/bridgy/issues/832""" with self.assertRaises(NotImplementedError): microformats2.json_to_object({ 'type': ['h-entry'], 'properties': { 'tag-of': [{ 'value': 'https://a/post' }], 'in-reply-to': [{ 'value': 'https://another/post' }], } })
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) # fetch url url = util.get_required_param(self, 'url') logging.info('Fetching %s', url) resp = urllib2.urlopen(url, timeout=appengine_config.HTTP_TIMEOUT) if url != resp.geturl(): logging.info('Redirected to %s', resp.geturl()) body = resp.read() # decode data if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, resp.geturl()) elif input == 'json-mf2': activities = [ microformats2.json_to_object(item) for item in json.loads(body).get('items', []) ] self.write_response( source.Source.make_activities_base_response(activities))
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) # fetch url url = util.get_required_param(self, 'url') logging.info('Fetching %s', url) resp = urllib2.urlopen(url, timeout=appengine_config.HTTP_TIMEOUT) if url != resp.geturl(): logging.info('Redirected to %s', resp.geturl()) body = resp.read() # decode data if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, resp.geturl()) elif input == 'json-mf2': activities = [microformats2.json_to_object(item) for item in json.loads(body).get('items', [])] self.write_response(source.Source.make_activities_base_response(activities))
def post(self): logging.info('(Params: %s )', self.request.params.items()) # fetch source page source = util.get_required_param(self, 'source') source_resp = common.requests_get(source) self.source_url = source_resp.url or source self.source_domain = urlparse.urlparse(self.source_url).netloc.split(':')[0] self.source_mf2 = mf2py.parse(source_resp.text, url=self.source_url, img_with_alt=True) # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(self.source_mf2, indent=2)) # check for backlink to bridgy fed (for webmention spec and to confirm # source's intent to federate to mastodon) if (self.request.host_url not in source_resp.text and urllib.quote(self.request.host_url, safe='') not in source_resp.text): common.error(self, "Couldn't find link to %s" % self.request.host_url) # convert source page to ActivityStreams entry = mf2util.find_first_entry(self.source_mf2, ['h-entry']) if not entry: common.error(self, 'No microformats2 found on %s' % self.source_url) logging.info('First entry: %s', json.dumps(entry, indent=2)) # make sure it has url, since we use that for AS2 id, which is required # for ActivityPub. props = entry.setdefault('properties', {}) if not props.get('url'): props['url'] = [self.source_url] self.source_obj = microformats2.json_to_object(entry, fetch_mf2=True) logging.info('Converted to AS1: %s', json.dumps(self.source_obj, indent=2)) self.try_activitypub() or self.try_salmon()
def render(): """Fetches a stored Response and renders it as HTML.""" source = flask_util.get_required_param('source') target = flask_util.get_required_param('target') id = f'{source} {target}' resp = Response.get_by_id(id) if not resp: error(f'No stored response for {id}', status=404) if resp.source_mf2: as1 = microformats2.json_to_object(json_loads(resp.source_mf2)) elif resp.source_as2: as1 = as2.to_as1(json_loads(resp.source_as2)) elif resp.source_atom: as1 = atom.atom_to_activity(resp.source_atom) else: error(f'Stored response for {id} has no data', status=404) # add HTML meta redirect to source page. should trigger for end users in # browsers but not for webmention receivers (hopefully). html = microformats2.activities_to_html([as1]) utf8 = '<meta charset="utf-8">' refresh = f'<meta http-equiv="refresh" content="0;url={source}">' return html.replace(utf8, utf8 + '\n' + refresh)
def get(self, domain): tld = domain.split('.')[-1] if tld in common.TLD_BLOCKLIST: self.error('', status=404) mf2 = util.fetch_mf2('http://%s/' % domain, gateway=True, headers=common.HEADERS) # logging.info('Parsed mf2 for %s: %s', resp.url, json_dumps(mf2, indent=2)) hcard = mf2util.representative_hcard(mf2, mf2['url']) logging.info('Representative h-card: %s', json_dumps(hcard, indent=2)) if not hcard: self.error("""\ Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on %s""" % mf2['url']) key = MagicKey.get_or_create(domain) obj = self.postprocess_as2(as2.from_as1(microformats2.json_to_object(hcard)), key=key) obj.update({ 'inbox': '%s/%s/inbox' % (self.request.host_url, domain), 'outbox': '%s/%s/outbox' % (self.request.host_url, domain), 'following': '%s/%s/following' % (self.request.host_url, domain), 'followers': '%s/%s/followers' % (self.request.host_url, domain), }) logging.info('Returning: %s', json_dumps(obj, indent=2)) self.response.headers.update({ 'Content-Type': common.CONTENT_TYPE_AS2, 'Access-Control-Allow-Origin': '*', }) self.response.write(json_dumps(obj, indent=2))
def test_post_type_discovery(self): for prop, verb in ('like-of', 'like'), ('repost-of', 'share'): obj = microformats2.json_to_object( {'type': ['h-entry'], 'properties': {prop: ['http://foo/bar']}}) self.assertEquals('activity', obj['objectType']) self.assertEquals(verb, obj['verb'])
def get(self, domain): url = 'http://%s/' % domain resp = common.requests_get(url) mf2 = mf2py.parse(resp.text, url=resp.url, img_with_alt=True) # logging.info('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2)) hcard = mf2util.representative_hcard(mf2, resp.url) logging.info('Representative h-card: %s', json.dumps(hcard, indent=2)) if not hcard: common.error( self, """\ Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on %s""" % resp.url) key = MagicKey.get_or_create(domain) obj = common.postprocess_as2(as2.from_as1( microformats2.json_to_object(hcard)), key=key) obj.update({ 'inbox': '%s/%s/inbox' % (appengine_config.HOST_URL, domain), 'outbox': '%s/%s/outbox' % (appengine_config.HOST_URL, domain), 'following': '%s/%s/following' % (appengine_config.HOST_URL, domain), 'followers': '%s/%s/followers' % (appengine_config.HOST_URL, domain), }) logging.info('Returning: %s', json.dumps(obj, indent=2)) self.response.headers.update({ 'Content-Type': common.CONTENT_TYPE_AS2, 'Access-Control-Allow-Origin': '*', }) self.response.write(json.dumps(obj, indent=2))
def test_html_content_and_summary(self): for expected_content, expected_summary, value in (('my html', 'my val', { 'value': 'my val', 'html': 'my html' }), ('my html', None, { 'html': 'my html' }), ('my val', 'my val', { 'value': 'my val' }), ('my str', 'my str', 'my str'), (None, None, {})): obj = microformats2.json_to_object( {'properties': { 'content': [value], 'summary': [value] }}) self.assertEqual(expected_content, obj.get('content')) self.assertEqual(expected_summary, obj.get('summary'))
def test_json_to_object_with_location_hcard(self): obj = microformats2.json_to_object({ 'type': ['h-entry'], 'properties': { 'location': [{ 'type': ['h-card'], 'properties': { 'name': ['Timeless Coffee Roasters'], 'locality': ['Oakland'], 'region': ['California'], 'latitude': ['50.820641'], 'longitude': ['-0.149522'], 'url': [ 'https://kylewm.com/venues/timeless-coffee-roasters-oakland-california' ], }, 'value': 'Timeless Coffee Roasters', }], } }) self.assertEqual( { 'objectType': 'place', 'latitude': 50.820641, 'longitude': -0.149522, 'position': '+50.820641-000.149522/', 'displayName': 'Timeless Coffee Roasters', 'url': 'https://kylewm.com/venues/timeless-coffee-roasters-oakland-california', }, obj['location'])
def test_json_to_object_authorship_fetch_mf2_func(self): self.expect_requests_get('http://example.com', u""" <div class="h-card"> <a class="p-name u-url" rel="me" href="/">Ms. ☕ Baz</a> <img class="u-photo" src="/my/pic" /> </div> """) self.mox.ReplayAll() self.assert_equals({ 'objectType': 'note', 'content': 'foo', 'author': { 'url': 'http://example.com/', 'objectType': 'person', 'displayName': u'Ms. ☕ Baz', 'image': [{'url': 'http://example.com/my/pic'}], }, }, microformats2.json_to_object({ 'type': ['h-entry'], 'properties': { 'content': ['foo'], 'author': ['http://example.com'], }, }, fetch_mf2=True))
def test_json_to_object_with_location_hcard(self): obj = microformats2.json_to_object({ 'type': ['h-entry'], 'properties': { 'location': [{ 'type': ['h-card'], 'properties': { 'name': ['Timeless Coffee Roasters'], 'locality': ['Oakland'], 'region': ['California'], 'latitude': ['50.820641'], 'longitude': ['-0.149522'], 'url': ['https://kylewm.com/venues/timeless-coffee-roasters-oakland-california'], }, 'value': 'Timeless Coffee Roasters', }], }}) self.assertEqual({ 'objectType': 'place', 'latitude': 50.820641, 'longitude': -0.149522, 'position': '+50.820641-000.149522/', 'displayName': 'Timeless Coffee Roasters', 'url': 'https://kylewm.com/venues/timeless-coffee-roasters-oakland-california', }, obj['location'])
def test_json_to_object_authorship_fetch_mf2_func(self): self.expect_requests_get( 'http://example.com', u""" <div class="h-card"> <a class="p-name u-url" rel="me" href="/">Ms. ☕ Baz</span> <img class="u-photo" src="/my/pic" /> </div> """) self.mox.ReplayAll() self.assert_equals( { 'objectType': 'note', 'content': 'foo', 'author': { 'url': 'http://example.com/', 'objectType': 'person', 'displayName': u'Ms. ☕ Baz', 'image': [{ 'url': 'http://example.com/my/pic' }], }, }, microformats2.json_to_object( { 'type': ['h-entry'], 'properties': { 'content': ['foo'], 'author': ['http://example.com'], }, }, fetch_mf2=True))
def get(self): source = util.get_required_param(self, 'source') target = util.get_required_param(self, 'target') id = '%s %s' % (source, target) resp = Response.get_by_id(id) if not resp: self.abort(404, 'No stored response for %s' % id) if resp.source_mf2: as1 = microformats2.json_to_object(json.loads(resp.source_mf2)) elif resp.source_as2: as1 = as2.to_as1(json.loads(resp.source_as2)) elif resp.source_atom: as1 = atom.atom_to_activity(resp.source_atom) else: self.abort(404, 'Stored response for %s has no data' % id) # add HTML meta redirect to source page. should trigger for end users in # browsers but not for webmention receivers (hopefully). html = microformats2.activities_to_html([as1]) utf8 = '<meta charset="utf-8">' refresh = '<meta http-equiv="refresh" content="0;url=%s">' % source html = html.replace(utf8, utf8 + '\n' + refresh) self.response.write(html)
def actor(domain): """Serves /[DOMAIN], fetches its mf2, converts to AS Actor, and serves it.""" tld = domain.split('.')[-1] if tld in common.TLD_BLOCKLIST: error('', status=404) mf2 = util.fetch_mf2(f'http://{domain}/', gateway=True, headers=common.HEADERS) hcard = mf2util.representative_hcard(mf2, mf2['url']) logging.info(f'Representative h-card: {json_dumps(hcard, indent=2)}') if not hcard: error( f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {mf2['url']}" ) key = MagicKey.get_or_create(domain) obj = common.postprocess_as2(as2.from_as1( microformats2.json_to_object(hcard)), key=key) obj.update({ 'preferredUsername': domain, 'inbox': f'{request.host_url}{domain}/inbox', 'outbox': f'{request.host_url}{domain}/outbox', 'following': f'{request.host_url}{domain}/following', 'followers': f'{request.host_url}{domain}/followers', }) logging.info(f'Returning: {json_dumps(obj, indent=2)}') return (obj, { 'Content-Type': common.CONTENT_TYPE_AS2, 'Access-Control-Allow-Origin': '*', })
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) url = util.get_required_param(self, 'url') # check if request is cached cache = self.request.get('cache', '').lower() != 'false' cache_key = 'U %s' % url cached = memcache.get(cache_key) if cache else None if cached: logging.info('Serving cached response %r', cache_key) url = cached['url'] body = cached['body'] else: # fetch url try: resp = util.urlopen(url) except (ValueError, httplib.InvalidURL) as e: self.abort(400, str(e)) # other exceptions are handled by webutil.handlers.handle_exception(), # which uses interpret_http_exception(), etc. if url != resp.geturl(): url = resp.geturl() logging.info('Redirected to %s', url) body = resp.read() if cache: logging.info('Caching response in %r', cache_key) memcache.set(cache_key, {'url': url, 'body': body}, URL_CACHE_TIME) # decode data mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url) elif input == 'json-mf2': mf2 = json.loads(body) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None if mf2: actor = microformats2.find_author( mf2, fetch_mf2_func=lambda url: mf2py.parse(url=url)) title = mf2util.interpret_feed(mf2, url).get('name') if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input == 'json-mf2': activities = [microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', [])] self.write_response(source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title)
def test_json_to_object_with_categories(self): obj = microformats2.json_to_object({ 'type': ['h-entry'], 'properties': { 'category': [ { 'type': ['h-card'], 'properties': { 'name': ['Kyle Mahan'], 'url': ['https://kylewm.com'], }, }, 'cats', 'indieweb'] }, }) self.assertEquals([ { 'objectType': 'person', 'displayName': 'Kyle Mahan', 'url': 'https://kylewm.com', }, { 'objectType': 'hashtag', 'displayName': 'cats', }, { 'objectType': 'hashtag', 'displayName': 'indieweb', }, ], obj.get('tags'))
def test_video_stream(self): """handle the case where someone (incorrectly) marks up the caption with p-photo """ mf2 = {'properties': {'video': ['http://example.com/video.mp4']}} obj = microformats2.json_to_object(mf2) self.assertEquals([{'url': 'http://example.com/video.mp4'}], obj['stream'])
def test_photo_property_is_not_url(self): """handle the case where someone (incorrectly) marks up the caption with p-photo """ mf2 = {'properties': {'photo': ['the caption', 'http://example.com/image.jpg']}} obj = microformats2.json_to_object(mf2) self.assertEquals([{'url': 'http://example.com/image.jpg'}], obj['image'])
def test_json_to_object_converts_text_newlines_to_brs(self): """Text newlines should be converted to <br>s.""" self.assert_equals({ 'objectType': 'note', 'content': 'asdf\nqwer', }, microformats2.json_to_object({ 'properties': {'content': [{'value': 'asdf\nqwer'}]}, }))
def test_json_to_object_drops_html_newlines(self): """HTML newlines should be discarded.""" self.assert_equals({ 'objectType': 'note', 'content': 'asdf qwer', }, microformats2.json_to_object({ 'properties': {'content': [{'html': 'asdf\nqwer', 'value': ''}]}, }))
def test_verb_require_of_suffix(self): for prop in 'like', 'repost': obj = microformats2.json_to_object({ 'type': ['h-entry'], 'properties': { prop: ['http://foo/bar'] } }) self.assertNotIn('verb', obj)
def _test_json_to_object_with_location(self, props): obj = microformats2.json_to_object({ 'type': ['h-entry'], 'properties': props, }) self.assertEquals({ 'latitude': 50.820641, 'longitude': -0.149522, 'objectType': 'place', }, obj.get('location'))
def test_html_content_and_summary(self): for expected_content, expected_summary, value in ( ('my html', 'my val', {'value': 'my val', 'html': 'my html'}), ('my html', None, {'html': 'my html'}), ('my val', 'my val', {'value': 'my val'}), ('my str', 'my str', 'my str'), (None, None, {})): obj = microformats2.json_to_object({'properties': {'content': [value], 'summary': [value]}}) self.assertEquals(expected_content, obj.get('content')) self.assertEquals(expected_summary, obj.get('summary'))
def test_nested_compound_url_object(self): mf2 = {'properties': { 'repost-of': [{ 'type': ['h-outer'], 'properties': { 'url': [{ 'type': ['h-inner'], 'properties': {'url': ['http://nested']}, }], }, }], }} obj = microformats2.json_to_object(mf2) self.assertEquals('http://nested', obj['object']['url'])
def test_json_to_object_simple_url_author(self): """Simple URL-only authors should be handled ok.""" self.assert_equals({ 'objectType': 'note', 'content': 'foo', 'author': { 'url': 'http://example.com', 'objectType': 'person', }, }, microformats2.json_to_object({ 'properties': { 'content': ['foo'], 'author': ['http://example.com'], }, }))
def test_json_to_object_simple_name_author(self): """Simple name-only authors should be handled ok.""" self.assert_equals({ 'objectType': 'note', 'content': 'foo', 'author': { 'displayName': 'My Name', 'objectType': 'person', }, }, microformats2.json_to_object({ 'properties': { 'content': ['foo'], 'author': ['My Name'], }, }))
def test_json_to_object_simple_name_author(self): """Simple name-only authors should be handled ok.""" self.assert_equals( { 'objectType': 'note', 'content': 'foo', 'author': { 'displayName': 'My Name', 'objectType': 'person', }, }, microformats2.json_to_object({ 'properties': { 'content': ['foo'], 'author': ['My Name'], }, }))
def test_json_to_object_simple_url_author(self): """Simple URL-only authors should be handled ok.""" self.assert_equals( { 'objectType': 'note', 'content': 'foo', 'author': { 'url': 'http://example.com', 'objectType': 'person', }, }, microformats2.json_to_object({ 'properties': { 'content': ['foo'], 'author': ['http://example.com'], }, }))
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2', 'jsonfeed') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) url, body = self._urlopen(util.get_required_param(self, 'url')) # decode data mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url) elif input == 'json-mf2': mf2 = json.loads(body) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None if mf2: def fetch_mf2_func(url): _, doc = self._urlopen(url) return mf2py.parse(doc=doc, url=url) actor = microformats2.find_author(mf2, fetch_mf2_func=fetch_mf2_func) title = mf2util.interpret_feed(mf2, url).get('name') if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input == 'json-mf2': activities = [ microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', []) ] elif input == 'jsonfeed': activities, actor = jsonfeed.jsonfeed_to_activities( json.loads(body)) self.write_response( source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title)
def convert_to_as2(url): """Fetch a URL as HTML, convert it to AS2, and return it. Currently mainly for Pixelfed. https://github.com/snarfed/bridgy-fed/issues/39 """ mf2 = util.fetch_mf2(url) entry = mf2util.find_first_entry(mf2, ['h-entry']) logging.info(f"Parsed mf2 for {mf2['url']}: {json_dumps(entry, indent=2)}") obj = common.postprocess_as2( as2.from_as1(microformats2.json_to_object(entry))) logging.info(f'Returning: {json_dumps(obj, indent=2)}') return obj, { 'Content-Type': common.CONTENT_TYPE_AS2, 'Access-Control-Allow-Origin': '*', }
def get(self): source = util.get_required_param(self, 'source') target = util.get_required_param(self, 'target') id = '%s %s' % (source, target) resp = Response.get_by_id(id) if not resp: self.abort(404, 'No stored response for %s' % id) if resp.source_mf2: as1 = microformats2.json_to_object(json.loads(resp.source_mf2)) elif resp.source_as2: as1 = as2.to_as1(json.loads(resp.source_as2)) elif resp.source_atom: as1 = atom.atom_to_activity(resp.source_atom) else: self.abort(404, 'Stored response for %s has no data' % id) self.response.write(microformats2.activities_to_html([as1]))
def convert_to_as2(self, url): """Fetch a URL as HTML, convert it to AS2, and return it. Currently mainly for Pixelfed. https://github.com/snarfed/bridgy-fed/issues/39 """ mf2 = util.fetch_mf2(url) entry = mf2util.find_first_entry(mf2, ['h-entry']) logging.info('Parsed mf2 for %s: %s', mf2['url'], json_dumps(entry, indent=2)) obj = self.postprocess_as2( as2.from_as1(microformats2.json_to_object(entry))) logging.info('Returning: %s', json_dumps(obj, indent=2)) self.response.headers.update({ 'Content-Type': common.CONTENT_TYPE_AS2, 'Access-Control-Allow-Origin': '*', }) self.response.write(json_dumps(obj, indent=2))
def dispatch_request(self): logging.info(f'Params: {list(request.form.items())}') # fetch source page source = flask_util.get_required_param('source') source_resp = common.requests_get(source) self.source_url = source_resp.url or source self.source_domain = urllib.parse.urlparse( self.source_url).netloc.split(':')[0] self.source_mf2 = util.parse_mf2(source_resp) # logging.debug(f'Parsed mf2 for {source_resp.url} : {json_dumps(self.source_mf2 indent=2)}') # check for backlink to bridgy fed (for webmention spec and to confirm # source's intent to federate to mastodon) if (request.host_url not in source_resp.text and urllib.parse.quote( request.host_url, safe='') not in source_resp.text): error("Couldn't find link to {request.host_url}") # convert source page to ActivityStreams entry = mf2util.find_first_entry(self.source_mf2, ['h-entry']) if not entry: error(f'No microformats2 found on {self.source_url}') logging.info(f'First entry: {json_dumps(entry, indent=2)}') # make sure it has url, since we use that for AS2 id, which is required # for ActivityPub. props = entry.setdefault('properties', {}) if not props.get('url'): props['url'] = [self.source_url] self.source_obj = microformats2.json_to_object(entry, fetch_mf2=True) logging.info( f'Converted to AS1: {json_dumps(self.source_obj, indent=2)}') for method in self.try_activitypub, self.try_salmon: ret = method() if ret: return ret return ''
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) # fetch url url = util.get_required_param(self, 'url') resp = util.urlopen(url) if url != resp.geturl(): url = resp.geturl() logging.info('Redirected to %s', url) body = resp.read() # decode data mf2 = None if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, url) mf2 = mf2py.parse(doc=body, url=url) elif input == 'json-mf2': mf2 = json.loads(body) mf2['rels'] = {} # mf2util expects rels activities = [microformats2.json_to_object(item) for item in mf2.get('items', [])] author = None title = None if mf2: author = microformats2.find_author(mf2) title = mf2util.interpret_feed(mf2, url).get('name') self.write_response(source.Source.make_activities_base_response(activities), url=url, actor=author, title=title)
def write_response(self, response, actor=None, url=None, title=None, hfeed=None): """Converts ActivityStreams activities and writes them out. Args: response: response dict with values based on OpenSocial ActivityStreams REST API, as returned by Source.get_activities_response() actor: optional ActivityStreams actor dict for current user. Only used for Atom and JSON Feed output. url: the input URL title: string, used in feed output (Atom, JSON Feed, RSS) hfeed: dict, parsed mf2 h-feed, if available """ format = self.request.get('format') or self.request.get( 'output') or 'json' if format not in FORMATS: raise exc.HTTPBadRequest('Invalid format: %s, expected one of %r' % (format, FORMATS)) if 'plaintext' in self.request.params: # override content type self.response.headers['Content-Type'] = 'text/plain' else: content_type = FORMATS.get(format) if content_type: self.response.headers['Content-Type'] = content_type if self.request.method == 'HEAD': return activities = response['items'] try: if format in ('as1', 'json', 'activitystreams'): self.response.out.write(json_dumps(response, indent=2)) elif format == 'as2': response.update({ 'items': [as2.from_as1(a) for a in activities], 'totalItems': response.pop('totalResults', None), 'updated': response.pop('updatedSince', None), 'filtered': None, 'sorted': None, }) self.response.out.write( json_dumps(util.trim_nulls(response), indent=2)) elif format == 'atom': hub = self.request.get('hub') reader = self.request.get('reader', 'true').lower() if reader not in ('true', 'false'): self.abort(400, 'reader param must be either true or false') if not actor and hfeed: actor = microformats2.json_to_object({ 'properties': hfeed.get('properties', {}), }) self.response.out.write( atom.activities_to_atom(activities, actor, host_url=url or self.request.host_url + '/', request_url=self.request.url, xml_base=util.base_url(url), title=title, rels={'hub': hub} if hub else None, reader=(reader == 'true'))) self.response.headers.add( 'Link', str('<%s>; rel="self"' % self.request.url)) if hub: self.response.headers.add('Link', str('<%s>; rel="hub"' % hub)) elif format == 'rss': if not title: title = 'Feed for %s' % url self.response.out.write( rss.from_activities(activities, actor, title=title, feed_url=self.request.url, hfeed=hfeed, home_page_url=util.base_url(url))) elif format in ('as1-xml', 'xml'): self.response.out.write(XML_TEMPLATE % util.to_xml(response)) elif format == 'html': self.response.out.write( microformats2.activities_to_html(activities)) elif format in ('mf2-json', 'json-mf2'): items = [microformats2.activity_to_json(a) for a in activities] self.response.out.write(json_dumps({'items': items}, indent=2)) elif format == 'jsonfeed': try: jf = jsonfeed.activities_to_jsonfeed( activities, actor=actor, title=title, feed_url=self.request.url) except TypeError as e: raise exc.HTTPBadRequest('Unsupported input data: %s' % e) self.response.out.write(json_dumps(jf, indent=2)) except ValueError as e: logging.warning('converting to output format failed', stack_info=True) self.abort(400, 'Could not convert to %s: %s' % (format, str(e)))
def attempt_single_item(self, item): """Attempts to preview or publish a single mf2 item. Args: item: mf2 item dict from mf2py Returns: CreationResult """ self.maybe_inject_silo_content(item) obj = microformats2.json_to_object(item) ignore_formatting = self.ignore_formatting(item) if ignore_formatting: prop = microformats2.first_props(item.get('properties', {})) content = microformats2.get_text(prop.get('content')) if content: obj['content'] = content.strip() # which original post URL to include? in order of preference: # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173) # 2. original user-provided URL if it redirected # 3. u-url if available # 4. actual final fetched URL if self.shortlink: obj['url'] = self.shortlink elif self.source_url() != self.fetched.url: obj['url'] = self.source_url() elif 'url' not in obj: obj['url'] = self.fetched.url logging.debug('Converted to ActivityStreams object: %s', json.dumps(obj, indent=2)) # posts and comments need content obj_type = obj.get('objectType') if obj_type in ('note', 'article', 'comment'): if (not obj.get('content') and not obj.get('summary') and not obj.get('displayName')): return gr_source.creation_result( abort=False, error_plain='Could not find content in %s' % self.fetched.url, error_html= 'Could not find <a href="http://microformats.org/">content</a> in %s' % self.fetched.url) self.preprocess(obj) include_link = self.include_link(item) if not self.authorize(): return gr_source.creation_result(abort=True) # RIP Facebook comments/likes. https://github.com/snarfed/bridgy/issues/350 if (isinstance(self.source, FacebookPage) and (obj_type == 'comment' or obj.get('verb') == 'like')): return gr_source.creation_result( abort=True, error_plain= 'Facebook comments and likes are no longer supported. :(', error_html= '<a href="https://github.com/snarfed/bridgy/issues/350">' 'Facebook comments and likes are no longer supported.</a> :(') if self.PREVIEW: result = self.source.gr_source.preview_create( obj, include_link=include_link, ignore_formatting=ignore_formatting) self.entity.published = result.content or result.description if not self.entity.published: return result # there was an error state = { 'source_key': self.source.key.urlsafe(), 'source_url': self.source_url(), 'target_url': self.target_url(), 'include_link': include_link, } vars = { 'source': self.preprocess_source(self.source), 'preview': result.content, 'description': result.description, 'webmention_endpoint': self.request.host_url + '/publish/webmention', 'state': self.encode_state_parameter(state), } vars.update(state) logging.info('Rendering preview with template vars %s', pprint.pformat(vars)) return gr_source.creation_result( template.render('templates/preview.html', vars)) else: result = self.source.gr_source.create( obj, include_link=include_link, ignore_formatting=ignore_formatting) self.entity.published = result.content if not result.content: return result # there was an error if 'url' not in self.entity.published: self.entity.published['url'] = obj.get('url') self.entity.type = self.entity.published.get( 'type') or models.get_type(obj) self.entity.type_label = self.source.TYPE_LABELS.get( self.entity.type) self.response.headers['Content-Type'] = 'application/json' logging.info('Returning %s', json.dumps(self.entity.published, indent=2)) self.response.headers['Location'] = self.entity.published[ 'url'].encode('utf-8') self.response.status = 201 return gr_source.creation_result( json.dumps(self.entity.published, indent=2))
def get(self): input = util.get_required_param(self, 'input') if input not in INPUTS: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, INPUTS)) url, body = self._fetch(util.get_required_param(self, 'url')) # decode data if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2', 'jsonfeed'): try: body_json = json.loads(body) body_items = (body_json if isinstance(body_json, list) else body_json.get('items') or [body_json]) except (TypeError, ValueError): raise exc.HTTPBadRequest('Could not decode %s as JSON' % url) mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url, img_with_alt=True) elif input in ('mf2-json', 'json-mf2'): mf2 = body_json if not hasattr(mf2, 'get'): raise exc.HTTPBadRequest( 'Expected microformats2 JSON input to be dict, got %s' % mf2.__class__.__name__) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None hfeed = None if mf2: def fetch_mf2_func(url): if util.domain_or_parent_in(urlparse.urlparse(url).netloc, SILO_DOMAINS): return {'items': [{'type': ['h-card'], 'properties': {'url': [url]}}]} _, doc = self._fetch(url) return mf2py.parse(doc=doc, url=url, img_with_alt=True) try: actor = microformats2.find_author(mf2, fetch_mf2_func=fetch_mf2_func) title = microformats2.get_title(mf2) hfeed = mf2util.find_first_entry(mf2, ['h-feed']) except (KeyError, ValueError) as e: raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (url, input, e)) try: if input in ('as1', 'activitystreams'): activities = body_items elif input == 'as2': activities = [as2.to_as1(obj) for obj in body_items] elif input == 'atom': try: activities = atom.atom_to_activities(body) except ElementTree.ParseError as e: raise exc.HTTPBadRequest('Could not parse %s as XML: %s' % (url, e)) except ValueError as e: raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' % (url, e)) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input in ('mf2-json', 'json-mf2'): activities = [microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', [])] elif input == 'jsonfeed': activities, actor = jsonfeed.jsonfeed_to_activities(body_json) except ValueError as e: logging.warning('parsing input failed', exc_info=True) self.abort(400, 'Could not parse %s as %s: %s' % (url, input, str(e))) self.write_response(source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title, hfeed=hfeed)
def attempt_single_item(self, item): """Attempts to preview or publish a single mf2 item. Args: item: mf2 item dict from mf2py Returns: a CreationResult object, where content is the string HTTP response or None if the source cannot publish this item type. """ props = item.get('properties', {}) ignore_formatting = self.ignore_formatting() if ignore_formatting is None: ignore_formatting = 'bridgy-ignore-formatting' in props obj = microformats2.json_to_object(item) if ignore_formatting: prop = microformats2.first_props(props) obj['content'] = prop.get('content', {}).get('value').strip() # which original post URL to include? if the source URL redirected, use the # (pre-redirect) source URL, since it might be a short URL. otherwise, use # u-url if it's set. finally, fall back to the actual fetched URL if self.source_url() != self.fetched.url: obj['url'] = self.source_url() elif 'url' not in obj: obj['url'] = self.fetched.url logging.debug('Converted to ActivityStreams object: %s', json.dumps(obj, indent=2)) # posts and comments need content obj_type = obj.get('objectType') if obj_type in ('note', 'article', 'comment'): if (not obj.get('content') and not obj.get('summary') and not obj.get('displayName')): return gr_source.creation_result( abort=False, error_plain='Could not find content in %s' % self.fetched.url, error_html='Could not find <a href="http://microformats.org/">content</a> in %s' % self.fetched.url) self.preprocess_activity(obj, ignore_formatting=ignore_formatting) omit_link = self.omit_link() if omit_link is None: omit_link = 'bridgy-omit-link' in props if not self.authorize(): return gr_source.creation_result(abort=True) # RIP Facebook comments/likes. https://github.com/snarfed/bridgy/issues/350 if (isinstance(self.source, FacebookPage) and (obj_type == 'comment' or obj.get('verb') == 'like')): return gr_source.creation_result( abort=True, error_plain='Facebook comments and likes are no longer supported. :(', error_html='<a href="https://github.com/snarfed/bridgy/issues/350">' 'Facebook comments and likes are no longer supported.</a> :(') if self.PREVIEW: result = self.source.gr_source.preview_create( obj, include_link=not omit_link) self.entity.published = result.content or result.description if not self.entity.published: return result # there was an error state = { 'source_key': self.source.key.urlsafe(), 'source_url': self.source_url(), 'target_url': self.target_url(), 'bridgy_omit_link': omit_link, } vars = {'source': self.preprocess_source(self.source), 'preview': result.content, 'description': result.description, 'webmention_endpoint': self.request.host_url + '/publish/webmention', 'state': self.encode_state_parameter(state), } vars.update(state) logging.info('Rendering preview with template vars %s', pprint.pformat(vars)) return gr_source.creation_result( template.render('templates/preview.html', vars)) else: result = self.source.gr_source.create(obj, include_link=not omit_link) self.entity.published = result.content if not result.content: return result # there was an error if 'url' not in self.entity.published: self.entity.published['url'] = obj.get('url') self.entity.type = self.entity.published.get('type') or models.get_type(obj) self.entity.type_label = self.source.TYPE_LABELS.get(self.entity.type) self.response.headers['Content-Type'] = 'application/json' logging.info('Returning %s', json.dumps(self.entity.published, indent=2)) return gr_source.creation_result( json.dumps(self.entity.published, indent=2))
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) url = util.get_required_param(self, 'url') # check if request is cached cache = self.request.get('cache', '').lower() != 'false' cache_key = 'U %s' % url cached = memcache.get(cache_key) if cache else None if cached: logging.info('Serving cached response %r', cache_key) url = cached['url'] body = cached['body'] else: # fetch url try: resp = util.urlopen(url) except (ValueError, httplib.InvalidURL) as e: self.abort(400, str(e)) except Exception as e: if util.is_connection_failure(e): # HTTP 504 Gateway Timeout self.abort(504, str(e)) raise if url != resp.geturl(): url = resp.geturl() logging.info('Redirected to %s', url) body = resp.read() if cache: logging.info('Caching response in %r', cache_key) memcache.set(cache_key, {'url': url, 'body': body}, URL_CACHE_TIME) # decode data mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url) elif input == 'json-mf2': mf2 = json.loads(body) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None if mf2: actor = microformats2.find_author( mf2, fetch_mf2_func=lambda url: mf2py.parse(url=url)) title = mf2util.interpret_feed(mf2, url).get('name') if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input == 'json-mf2': activities = [microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', [])] self.write_response(source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title)
def test_photo_property_has_no_url(self): """handle the case where the photo property is *only* text, not a url""" mf2 = {'properties': {'photo': ['the caption', 'alternate text']}} obj = microformats2.json_to_object(mf2) self.assertFalse(obj.get('image'))
def attempt_single_item(self, item): """Attempts to preview or publish a single mf2 item. Args: item: mf2 item dict from mf2py Returns: CreationResult """ self.maybe_inject_silo_content(item) obj = microformats2.json_to_object(item) ignore_formatting = self.ignore_formatting(item) if ignore_formatting: prop = microformats2.first_props(item.get('properties', {})) content = microformats2.get_text(prop.get('content')) if content: obj['content'] = content.strip() # which original post URL to include? in order of preference: # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173) # 2. original user-provided URL if it redirected # 3. u-url if available # 4. actual final fetched URL if self.shortlink: obj['url'] = self.shortlink elif self.source_url() != self.fetched.url: obj['url'] = self.source_url() elif 'url' not in obj: obj['url'] = self.fetched.url logging.debug('Converted to ActivityStreams object: %s', json.dumps(obj, indent=2)) # posts and comments need content obj_type = obj.get('objectType') if obj_type in ('note', 'article', 'comment'): if (not obj.get('content') and not obj.get('summary') and not obj.get('displayName')): return gr_source.creation_result( abort=False, error_plain='Could not find content in %s' % self.fetched.url, error_html='Could not find <a href="http://microformats.org/">content</a> in %s' % self.fetched.url) self.preprocess(obj) omit_link = self.omit_link(item) if not self.authorize(): return gr_source.creation_result(abort=True) # RIP Facebook comments/likes. https://github.com/snarfed/bridgy/issues/350 if (isinstance(self.source, FacebookPage) and (obj_type == 'comment' or obj.get('verb') == 'like')): return gr_source.creation_result( abort=True, error_plain='Facebook comments and likes are no longer supported. :(', error_html='<a href="https://github.com/snarfed/bridgy/issues/350">' 'Facebook comments and likes are no longer supported.</a> :(') if self.PREVIEW: result = self.source.gr_source.preview_create( obj, include_link=not omit_link, ignore_formatting=ignore_formatting) self.entity.published = result.content or result.description if not self.entity.published: return result # there was an error state = { 'source_key': self.source.key.urlsafe(), 'source_url': self.source_url(), 'target_url': self.target_url(), 'bridgy_omit_link': omit_link, } vars = {'source': self.preprocess_source(self.source), 'preview': result.content, 'description': result.description, 'webmention_endpoint': self.request.host_url + '/publish/webmention', 'state': self.encode_state_parameter(state), } vars.update(state) logging.info('Rendering preview with template vars %s', pprint.pformat(vars)) return gr_source.creation_result( template.render('templates/preview.html', vars)) else: result = self.source.gr_source.create(obj, include_link=not omit_link, ignore_formatting=ignore_formatting) self.entity.published = result.content if not result.content: return result # there was an error if 'url' not in self.entity.published: self.entity.published['url'] = obj.get('url') self.entity.type = self.entity.published.get('type') or models.get_type(obj) self.entity.type_label = self.source.TYPE_LABELS.get(self.entity.type) self.response.headers['Content-Type'] = 'application/json' logging.info('Returning %s', json.dumps(self.entity.published, indent=2)) self.response.headers['Location'] = self.entity.published['url'].encode('utf-8') self.response.status = 201 return gr_source.creation_result( json.dumps(self.entity.published, indent=2))
def test_h_as_article(self): obj = microformats2.json_to_object({'type': ['h-entry', 'h-as-article']}) self.assertEquals('article', obj['objectType'])
def attempt_single_item(self, item): """Attempts to preview or publish a single mf2 item. Args: item: mf2 item dict from mf2py Returns: CreationResult """ self.maybe_inject_silo_content(item) obj = microformats2.json_to_object(item) ignore_formatting = self.ignore_formatting(item) if ignore_formatting: prop = microformats2.first_props(item.get('properties', {})) content = microformats2.get_text(prop.get('content')) if content: obj['content'] = content.strip() # which original post URL to include? in order of preference: # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173) # 2. original user-provided URL if it redirected # 3. u-url if available # 4. actual final fetched URL if self.shortlink: obj['url'] = self.shortlink elif self.source_url() != self.fetched.url: obj['url'] = self.source_url() elif 'url' not in obj: obj['url'] = self.fetched.url logging.debug('Converted to ActivityStreams object: %s', json.dumps(obj, indent=2)) # posts and comments need content obj_type = obj.get('objectType') if obj_type in ('note', 'article', 'comment'): if (not obj.get('content') and not obj.get('summary') and not obj.get('displayName')): return gr_source.creation_result( abort=False, error_plain='Could not find content in %s' % self.fetched.url, error_html='Could not find <a href="http://microformats.org/">content</a> in %s' % self.fetched.url) self.preprocess(obj) include_link = self.include_link(item) if not self.authorize(): return gr_source.creation_result(abort=True) # RIP Facebook. # https://github.com/snarfed/bridgy/issues/817 # https://github.com/snarfed/bridgy/issues/350 verb = obj.get('verb') if isinstance(self.source, FacebookPage): return gr_source.creation_result( abort=True, error_plain='Facebook is no longer supported. So long, and thanks for all the fish!', error_html='<a href="https://brid.gy/about#rip-facebook">Facebook is no longer supported. So long, and thanks for all the fish!</a>') if self.PREVIEW: result = self.source.gr_source.preview_create( obj, include_link=include_link, ignore_formatting=ignore_formatting) self.entity.published = result.content or result.description if not self.entity.published: return result # there was an error return self._render_preview(result, include_link=include_link) else: result = self.source.gr_source.create( obj, include_link=include_link, ignore_formatting=ignore_formatting) self.entity.published = result.content if not result.content: return result # there was an error if 'url' not in self.entity.published: self.entity.published['url'] = obj.get('url') self.entity.type = self.entity.published.get('type') or models.get_type(obj) self.response.headers['Content-Type'] = 'application/json' logging.info('Returning %s', json.dumps(self.entity.published, indent=2)) self.response.headers['Location'] = self.entity.published['url'].encode('utf-8') self.response.status = 201 return gr_source.creation_result( json.dumps(self.entity.published, indent=2))
def test_verb_require_of_suffix(self): for prop in 'like', 'repost': obj = microformats2.json_to_object( {'type': ['h-entry'], 'properties': {prop: ['http://foo/bar']}}) self.assertNotIn('verb', obj)
def test_ignore_h_as(self): """https://github.com/snarfed/bridgy/issues/635""" obj = microformats2.json_to_object({'type': ['h-entry']}) self.assertEquals('note', obj['objectType'])