def test_escape_urls(self): url = 'http://foo/bar?baz&baj' activity = {'url': url, 'object': {}} out = atom.activities_to_atom([activity], test_twitter.ACTOR, title='my title') self.assertIn('<id>http://foo/bar?baz&baj</id>', out) self.assertNotIn(url, out)
def test_to_people(self): got = atom.activities_to_atom( [ { "object": { "objectType": "note", "content": "an extended tweet reply", "to": [ {"objectType": "group", "alias": "@public"}, {"objectType": "person", "url": "https://twitter.com/A", "displayName": "aye"}, {"objectType": "person", "id": "B", "url": "https://twitter.com/B", "displayName": "bee"}, ], } } ], None, ) self.assert_multiline_in( """ <p>In reply to <a class="h-card p-name u-url" href="https://twitter.com/A">aye</a>, <a class="h-card p-name u-url" href="https://twitter.com/B">bee</a>:</p> """, got, )
def test_render_share_of_obj_with_attachments(self): """This is e.g. a retweet of a quote tweet.""" activity = { 'verb': 'share', 'object': { 'content': 'RT @quoter: comment', 'attachments': [{ 'objectType': 'note', 'content': 'quoted text', }, { 'objectType': 'video', 'stream': [{'url': 'http://a/vidjo/1.mov'}], }], }, } out = atom.activities_to_atom([activity], test_twitter.ACTOR, title='my title') self.assertIn('RT @quoter: comment', out) self.assert_multiline_in("""\ <blockquote> quoted text </blockquote> """, out) self.assert_multiline_in(""" <p><video class="u-video" src="http://a/vidjo/1.mov" controls="controls" poster="">Your browser does not support the video tag. <a href="http://a/vidjo/1.mov">Click here to view directly. </a></video> </p>""", out)
def test_image_duplicated_in_attachment(self): """If an image is also in an attachment, don't render a duplicate. https://github.com/snarfed/twitter-atom/issues/8 """ activity = { 'object': { 'content': 'foo bar', 'image': [ {'url': 'http://pics/1.jpg'}, {'url': 'http://pics/2.jpg'}, ], 'attachments': [{ 'objectType': 'note', 'image': {'url': 'http://pics/2.jpg'}, }, { 'objectType': 'image', 'image': {'url': 'http://pics/1.jpg'}, }], }, } got = atom.activities_to_atom([activity], {}) self.assertEqual(1, got.count('<img class="u-photo" src="http://pics/1.jpg" alt="" />'), got) self.assert_multiline_in(""" <link rel="enclosure" href="http://pics/1.jpg" type="image/jpeg" /> """, got) self.assertNotIn('<img class="u-photo" src="http://pics/2.jpg" alt="" />', got, got)
def get(self): self.response.headers['Content-Type'] = 'application/atom+xml' tw = twitter.Twitter(util.get_required_param(self, 'access_token_key'), util.get_required_param(self, 'access_token_secret')) list_str = self.request.get('list') if list_str: if list_str == 'tonysss13/financial': raise exc.HTTPTooManyRequests("Please reduce your feed reader's polling rate.") # this pattern is duplicated in index.html. # also note that list names allow more characters that usernames, but the # allowed characters aren't explicitly documented. :/ details: # https://groups.google.com/d/topic/twitter-development-talk/lULdIVR3B9s/discussion match = re.match(r'@?([A-Za-z0-9_]+)/([A-Za-z0-9_-]+)', list_str) if not match: self.abort(400, 'List must be of the form username/list (got %r)' % list_str) user_id, group_id = match.groups() actor = tw.get_actor(user_id) activities = tw.get_activities(user_id=user_id, group_id=group_id, count=50) else: actor = tw.get_actor() activities = tw.get_activities(count=50) title = 'twitter-atom feed for %s' % (list_str or actor.get('username', '')) try: self.response.out.write(atom.activities_to_atom( activities, actor, title=title, host_url=self.request.host_url + '/', request_url=self.request.path_url, xml_base='https://twitter.com/')) except DeadlineExceededError: logging.warning('Hit 60s overall request deadline, returning 503.', exc_info=True) raise exc.HTTPServiceUnavailable()
def test_render_encodes_ampersands(self): # only the one unencoded & in a&b should be encoded activity = {"object": {"content": "X <y> http://z?w a&b c&d e>f"}} out = atom.activities_to_atom([activity], test_twitter.ACTOR, title="my title") self.assert_multiline_in("X <y> http://z?w a&b c&d e>f", out) self.assertNotIn("a&b", out)
def test_reader_param_and_location(self): activity = { 'object': { 'content': 'foo', 'location': { 'displayName': 'My place', 'url': 'http://my/place', }, }, } location = '<a class="p-name u-url" href="http://my/place">My place</a>' self.assert_multiline_in( location, atom.activities_to_atom([activity], {}, reader=True)) self.assertNotIn( location, atom.activities_to_atom([activity], {}, reader=False))
def test_attachments(self): got = atom.activities_to_atom( [ { "object": { "attachments": [ {"objectType": "note", "url": "http://p", "content": "note content"}, {"objectType": "x", "url": "http://x"}, {"objectType": "article", "url": "http://a", "content": "article content"}, ] } } ], None, ) self.assert_multiline_in( """ <blockquote> note content </blockquote> """, got, ) self.assert_multiline_in( """ <blockquote> article content </blockquote> """, got, )
def test_render_encodes_ampersands(self): # only the one unencoded & in a&b should be encoded activity = {'object': {'content': 'X <y> http://z?w a&b c&d e>f'}} out = atom.activities_to_atom([activity], test_twitter.ACTOR, title='my title') self.assertIn('X <y> http://z?w a&b c&d e>f', out) self.assertNotIn('a&b', out)
def test_escape_urls(self): url = "http://foo/bar?baz&baj" activity = {"url": url, "object": {}} out = atom.activities_to_atom([activity], test_twitter.ACTOR, title="my title") self.assert_multiline_in("<id>http://foo/bar?baz&baj</id>", out) self.assertNotIn(url, out)
def test_to_people(self): got = atom.activities_to_atom([{ 'object': { 'objectType': 'note', 'content': 'an extended tweet reply', 'to': [{ 'objectType': 'group', 'alias': '@public', }, { 'objectType': 'person', 'url': 'https://twitter.com/A', 'displayName': 'aye', }, { 'objectType': 'person', 'id': 'B', 'url': 'https://twitter.com/B', 'displayName': 'bee', }], }, }], None) self.assert_multiline_in(""" <p>In reply to <a class="h-card p-name u-url" href="https://twitter.com/A">aye</a>, <a class="h-card p-name u-url" href="https://twitter.com/B">bee</a>:</p> """, got)
def test_media_tags_and_enclosures(self): got = atom.activities_to_atom([{ 'object': { 'content': 'foo bar', 'attachments': [{ 'objectType': 'audio', 'stream': {'url': 'http://a/podcast.mp3'}, 'url': 'unused', }, { 'objectType': 'video', 'stream': [ {'url': 'http://a/vidjo/1.mov'}, # only the first is rendered {'url': 'http://a/vidjo/2.mov'}, ], 'image': {'url': 'http://thumb'}, 'url': 'also unused', }], }, }], {}) self.assert_multiline_in("""\ <p><audio class="u-audio" src="http://a/podcast.mp3" controls="controls">Your browser does not support the audio tag. <a href="http://a/podcast.mp3">Click here to listen directly.</a></audio> </p> <p><video class="u-video" src="http://a/vidjo/1.mov" controls="controls" poster="http://thumb">Your browser does not support the video tag. <a href="http://a/vidjo/1.mov">Click here to view directly. <img src="http://thumb" /></a></video> </p> """, got) self.assert_multiline_in("""\ <link rel="enclosure" href="http://a/podcast.mp3" type="audio/mpeg" /> <link rel="enclosure" href="http://a/vidjo/1.mov" type="video/quicktime" /> """, got, ignore_blanks=True) self.assertNotIn('unused', got)
def get(self): cookie = 'sessionid=%s' % urllib.quote( util.get_required_param(self, 'sessionid').encode('utf-8')) logging.info('Fetching with Cookie: %s', cookie) host_url = self.request.host_url + '/' ig = instagram.Instagram() try: resp = ig.get_activities_response(group_id=source.FRIENDS, scrape=True, cookie=cookie) except Exception as e: status, text = util.interpret_http_exception(e) if status in ('401', '403'): self.response.headers['Content-Type'] = 'application/atom+xml' self.response.out.write(atom.activities_to_atom([{ 'object': { 'url': self.request.url, 'content': 'Your instagram-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>' % host_url, }, }], {}, title='instagram-atom', host_url=host_url, request_url=self.request.path_url)) return elif status: self.response.status = 502 if int(status) // 100 == 5 else status elif util.is_connection_failure(e): self.response.status = 504 # HTTP 504 Gateway Timeout else: logging.exception('oops!') self.response.status = 500 if isinstance(text, str): text = text.decode('utf-8') self.response.text = text or u'Unknown error.' return actor = resp.get('actor') if actor: logging.info('Logged in as %s (%s)', actor.get('username'), actor.get('displayName')) else: logging.warning("Couldn't determine Instagram user!") title = 'instagram-atom feed for %s' % ig.actor_name(actor) self.response.headers['Content-Type'] = 'application/atom+xml' self.response.out.write(atom.activities_to_atom( resp.get('items', []), actor, title=title, host_url=host_url, request_url=self.request.path_url, xml_base='https://www.instagram.com/'))
def test_render_with_image(self): """Attached images are rendered inline as HTML """ self.assertIn( '<img class="thumbnail" src="http://attach/image/big"', atom.activities_to_atom([copy.deepcopy(test_instagram.ACTIVITY)], test_instagram.ACTOR, title='my title'))
def test_render_with_images(self): """Attached images are rendered inline as HTML.""" activity = copy.deepcopy(test_instagram.ACTIVITY) activity["object"]["attachments"].append({"objectType": "image", "image": {"url": "http://image/2"}}) got = atom.activities_to_atom([activity], test_instagram.ACTOR, title="") self.assert_multiline_in('<img class="thumbnail" src="http://attach/image/big"', got) self.assert_multiline_in('<img class="thumbnail" src="http://image/2"', got)
def test_updated_defaults_to_published(self): activities = [ {"object": {"published": "2013-12-27T17:25:55+00:00"}}, {"object": {"published": "2014-12-27T17:25:55+00:00"}}, ] out = atom.activities_to_atom(activities, test_twitter.ACTOR, title="my title") self.assert_multiline_in("<updated>2014-12-27T17:25:55+00:00</updated>", out)
def test_updated_defaults_to_published(self): activities = [ {'object': {'published': '2013-12-27T17:25:55+00:00'}}, {'object': {'published': '2014-12-27T17:25:55+00:00'}}, ] out = atom.activities_to_atom(activities, test_twitter.ACTOR, title='my title') self.assertIn('<updated>2014-12-27T17:25:55+00:00</updated>', out)
def test_render_untitled_image(self): """Images should be included even if there is no other content """ activity = copy.deepcopy(test_instagram.ACTIVITY) del activity['object']['content'] self.assertIn( '<img class="thumbnail" src="http://attach/image/big"', atom.activities_to_atom([activity], test_instagram.ACTOR, title='my title'))
def write_response(self, response, actor=None, url=None, title=None): """Converts ActivityStreams activities and writes them out. Args: response: response dict with values based on OpenSocial ActivityStreams REST API, as returned by Source.get_activities_response() actor: optional ActivityStreams actor dict for current user. Only used for Atom output. url: the input URL title: string, Used in Atom output """ expected_formats = ('activitystreams', 'json', 'atom', 'xml', 'html', 'json-mf2') format = self.request.get('format') or self.request.get('output') or 'json' if format not in expected_formats: raise exc.HTTPBadRequest('Invalid format: %s, expected one of %r' % (format, expected_formats)) activities = response['items'] self.response.headers.update({ 'Access-Control-Allow-Origin': '*', 'Strict-Transport-Security': 'max-age=16070400; includeSubDomains; preload', # 6 months }) if format in ('json', 'activitystreams'): self.response.headers['Content-Type'] = 'application/json' self.response.out.write(json.dumps(response, indent=2)) elif format == 'atom': self.response.headers['Content-Type'] = 'text/xml' hub = self.request.get('hub') self.response.out.write(atom.activities_to_atom( activities, actor, host_url=url or self.request.host_url + '/', request_url=self.request.url, xml_base=util.base_url(url), title=title, rels={'hub': hub} if hub else None)) self.response.headers.add('Link', str('<%s>; rel="self"' % self.request.url)) if hub: self.response.headers.add('Link', str('<%s>; rel="hub"' % hub)) elif format == 'xml': self.response.headers['Content-Type'] = 'text/xml' self.response.out.write(XML_TEMPLATE % util.to_xml(response)) elif format == 'html': self.response.headers['Content-Type'] = 'text/html' self.response.out.write(microformats2.activities_to_html(activities)) elif format == 'json-mf2': self.response.headers['Content-Type'] = 'application/json' items = [microformats2.object_to_json(a) for a in activities] self.response.out.write(json.dumps({'items': items}, indent=2)) if 'plaintext' in self.request.params: # override response content type self.response.headers['Content-Type'] = 'text/plain'
def test_render_encodes_ampersands_in_quote_tweets(self): activity = { "object": { "content": "outer", "attachments": [{"objectType": "note", "content": "X <y> http://z?w a&b c&d e>f"}], } } out = atom.activities_to_atom([activity], test_twitter.ACTOR, title="my title") self.assert_multiline_in("X <y> http://z?w a&b c&d e>f", out) self.assertNotIn("a&b", out)
def test_render_with_images(self): """Attached images are rendered inline as HTML.""" activity = copy.deepcopy(test_instagram.ACTIVITY) activity['object']['attachments'].append( {'objectType': 'image', 'image': {'url': 'http://image/2'}}) got = atom.activities_to_atom([activity],test_instagram.ACTOR, title='') self.assert_multiline_in( '<img class="u-photo" src="http://attach/image/big"', got) self.assert_multiline_in( '<img class="u-photo" src="http://image/2"', got)
def test_xml_base(self): self.assert_multiline_in(""" <?xml version="1.0" encoding="UTF-8"?> <feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom" xmlns:activity="http://activitystrea.ms/spec/1.0/" xmlns:georss="http://www.georss.org/georss" xmlns:ostatus="http://ostatus.org/schema/1.0" xmlns:thr="http://purl.org/syndication/thread/1.0" xml:base="http://my.xml/base"> """, atom.activities_to_atom([], {}, xml_base='http://my.xml/base'))
def test_render_encodes_ampersands_in_quote_tweets(self): activity = {'object': { 'content': 'outer', 'attachments': [{ 'objectType': 'note', 'content': 'X <y> http://z?w a&b c&d e>f', }], }} out = atom.activities_to_atom([activity], test_twitter.ACTOR, title='my title') self.assert_multiline_in('X <y> http://z?w a&b c&d e>f', out) self.assertNotIn('a&b', out)
def test_activities_to_atom(self): for test_module in test_facebook, test_instagram, test_twitter: self.assert_multiline_equals( test_module.ATOM % {'request_url': 'http://request/url', 'host_url': 'http://host/url', }, atom.activities_to_atom( [copy.deepcopy(test_module.ACTIVITY)], test_module.ACTOR, request_url='http://request/url?access_token=foo', host_url='http://host/url', ))
def test_object_in_reply_to(self): """inReplyTo should be translated to thr:in-reply-to.""" activity = {'object': { 'id': 'my:reply', 'inReplyTo': [{ 'id': 'the:orig', 'url': 'http://orig', }], }} self.assert_multiline_in( '<thr:in-reply-to ref="the:orig" href="http://orig" type="text/html" />', atom.activities_to_atom([activity], {}))
def get(self): self.response.headers['Content-Type'] = 'application/atom+xml' # New style feed with user-provided app (consumer) key and secret if (not self.request.get('consumer_key') and not self.request.get('consumer_secret')): # Welcome back message for old feeds self.response.out.write("""\ <?xml version="1.0" encoding="UTF-8"?> <feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom"> <generator uri="https://twitter-atom.appspot.com/" version="0.1">twitter-atom</generator> <id>https://twitter-atom.appspot.com/</id> <title>Twitter Atom feeds is back!</title> <updated>2013-07-08T20:00:00</updated> <entry> <id>tag:twitter-atom.appspot.com,2013:2</id> <title>Twitter Atom feeds is back!</title> <content type="xhtml"> <div xmlns="http://www.w3.org/1999/xhtml"> <p style="color: red; font-style: italic;"><b>Twitter Atom feeds is back! I'm experimenting with a new design that Twitter will (hopefully) be ok with. You can try it out by <a href="http://twitter-atom.appspot.com/">generating a new feed here</a>. Feel free to <a href="http://twitter.com/snarfed_org">ping me</a> if you have any questions. Welcome back!</b></p> </div> </content> <published>2013-07-08T20:00:00</published> </entry> </feed> """) return tw = twitter.Twitter(util.get_required_param(self, 'access_token_key'), util.get_required_param(self, 'access_token_secret')) list_str = self.request.get('list') if list_str: # this pattern is duplicated in index.html. # also note that list names allow more characters that usernames, but the # allowed characters aren't explicitly documented. :/ details: # https://groups.google.com/d/topic/twitter-development-talk/lULdIVR3B9s/discussion match = re.match(r'@?([A-Za-z0-9_]+)/([A-Za-z0-9_-]+)', list_str) if not match: self.abort(400, 'List must be of the form username/list (got %r)' % list_str) user_id, group_id = match.groups() actor = tw.get_actor(user_id) activities = tw.get_activities(user_id=user_id, group_id=group_id, count=50) else: actor = tw.get_actor() activities = tw.get_activities(count=50) title = 'twitter-atom feed for %s' % (list_str or actor.get('username', '')) self.response.out.write(atom.activities_to_atom( activities, actor, title=title, host_url=self.request.host_url + '/', request_url=self.request.path_url, xml_base='https://twitter.com/'))
def test_render_share_no_content(self): activity = {"verb": "share", "object": {"content": "original object", "author": {"displayName": "Mr. Foo"}}} out = atom.activities_to_atom([activity], {}) self.assert_multiline_in( """ Shared <a href="#">a post</a> by <span class="h-card"> <span class="p-name">Mr. Foo</span> </span> original object """, out, )
def test_activities_to_atom(self): for test_module in test_facebook, test_instagram, test_twitter: request_url = "http://request/url?access_token=foo" host_url = "http://host/url" base_url = "http://base/url" self.assert_multiline_equals( test_module.ATOM % {"request_url": request_url, "host_url": host_url, "base_url": base_url}, atom.activities_to_atom( [copy.deepcopy(test_module.ACTIVITY)], test_module.ACTOR, request_url=request_url, host_url=host_url, xml_base=base_url, ), )
def handle_exception(self, e, debug): code, text = util.interpret_http_exception(e) if code in ('401', '403'): self.response.headers['Content-Type'] = 'application/atom+xml' host_url = self.request.host_url + '/' self.response.out.write(atom.activities_to_atom([{ 'object': { 'url': self.request.url, 'content': 'Your twitter-atom login isn\'t working. <a href="%s">Click here to regenerate your feed!</a>' % host_url, }, }], {}, title='facebook-atom', host_url=host_url, request_url=self.request.path_url)) return return handlers.handle_exception(self, e, debug)
def test_render_encodes_ampersands(self): activity = { 'object': { # only the one unencoded & in a&b should be encoded 'content': 'X <y> http://z?w a&b c&d e>f', 'author': { 'displayName': 'Alice & Bob', 'url': 'http://alice/?and&bob', }, }, } out = atom.activities_to_atom([activity], test_twitter.ACTOR, title='my title') self.assert_multiline_in('X <y> http://z?w a&b c&d e>f', out) self.assertNotIn('a&b', out) self.assertNotIn('and&bob', out)
def test_attachments(self): got = atom.activities_to_atom( [{ 'object': { 'attachments': [ { 'objectType': 'note', 'url': 'http://p', 'content': 'note content' }, { 'objectType': 'x', 'url': 'http://x' }, { 'objectType': 'article', 'url': 'http://a', 'content': 'article content', 'author': { 'displayName': 'Mr. Foo', 'url': 'http://x/', # image shouldn't be included 'image': { 'url': 'http://x/avatar.jpg' }, }, }, ] } }], None) self.assert_multiline_in( """ <blockquote> note content </blockquote> """, got) self.assert_multiline_in( """ <blockquote> <a class="p-name u-url" href="http://x/">Mr. Foo</a>: article content </blockquote> """, got)
def test_updated_defaults_to_published(self): activities = [ { 'object': { 'published': '2013-12-27T17:25:55+00:00' } }, { 'object': { 'published': '2014-12-27T17:25:55+00:00' } }, ] out = atom.activities_to_atom(activities, test_twitter.ACTOR, title='my title') self.assert_multiline_in( '<updated>2014-12-27T17:25:55+00:00</updated>', out)
def test_image_duplicated_in_attachment(self): """If an image is also in an attachment, don't render a duplicate. https://github.com/snarfed/twitter-atom/issues/8 """ activity = { 'object': { 'content': 'foo bar', 'image': [ { 'url': 'http://pics/1.jpg' }, { 'url': 'http://pics/2.jpg' }, ], 'attachments': [{ 'objectType': 'note', 'image': { 'url': 'http://pics/2.jpg' }, }, { 'objectType': 'image', 'image': { 'url': 'http://pics/1.jpg' }, }], }, } got = atom.activities_to_atom([activity], {}) self.assertEqual( 1, got.count( '<img class="u-photo" src="http://pics/1.jpg" alt="" />'), got) self.assert_multiline_in( """ <link rel="enclosure" href="http://pics/1.jpg" type="image/jpeg" /> """, got) self.assertNotIn( '<img class="u-photo" src="http://pics/2.jpg" alt="" />', got, got)
def test_render_share(self): activity = { 'verb': 'share', 'content': "sharer's comment", 'object': { 'content': 'original object', 'author': { 'displayName': 'Mr. Foo' }, }, } out = atom.activities_to_atom([activity], {}) self.assert_multiline_in(""" <title>sharer's comment</title> """, out) self.assert_multiline_in(""" sharer's comment """, out) self.assertNotIn('original object', out)
def test_render_encodes_ampersands_in_quote_tweets(self): activity = { 'object': { 'content': 'outer', 'attachments': [{ 'objectType': 'note', 'content': 'X <y> http://z?w a&b c&d e>f', }], } } out = atom.activities_to_atom([activity], test_twitter.ACTOR, title='my title') self.assert_multiline_in('X <y> http://z?w a&b c&d e>f', out) self.assertNotIn('a&b', out)
def test_render_share_no_content(self): activity = { 'verb': 'share', 'object': { 'content': 'original object', 'author': { 'displayName': 'Mr. Foo' }, }, } out = atom.activities_to_atom([activity], {}) self.assert_multiline_in( """ Shared <a href="#">a post</a> by <span class="h-card"> <span class="p-name">Mr. Foo</span> </span> original object """, out)
def test_media_enclosures(self): self.assert_multiline_in( """\ <link rel="enclosure" href="http://a/podcast.mp3" /> <link rel="enclosure" href="http://a/vidjo.mov" /> """, atom.activities_to_atom([{ 'object': { 'content': 'foo bar', 'attachments': [{ 'objectType': 'audio', 'url': 'http://a/podcast.mp3', }, { 'objectType': 'video', 'url': 'http://a/vidjo.mov', }], }, }], {}))
def get(self): cookie = 'sessionid=%s' % urllib.quote( util.get_required_param(self, 'sessionid').encode('utf-8')) logging.info('Fetching with Cookie: %s', cookie) ig = instagram.Instagram() try: resp = ig.get_activities_response(group_id=source.FRIENDS, scrape=True, cookie=cookie) except Exception as e: status, text = util.interpret_http_exception(e) if status: self.response.status = 502 if status == 500 else status elif util.is_connection_failure(e): self.response.status = 504 # HTTP 504 Gateway Timeout else: logging.exception('oops!') self.response.status = 500 if isinstance(text, str): text = text.decode('utf-8') self.response.text = text or u'Unknown error.' return actor = resp.get('actor') if actor: logging.info('Logged in as %s (%s)', actor.get('username'), actor.get('displayName')) else: logging.warning("Couldn't determine Instagram user!") title = 'instagram-atom feed for %s' % ig.actor_name(actor) self.response.headers['Content-Type'] = 'application/atom+xml' self.response.out.write( atom.activities_to_atom(resp.get('items', []), actor, title=title, host_url=self.request.host_url + '/', request_url=self.request.path_url, xml_base='https://www.instagram.com/'))
def write_response(self, response, actor=None): """Converts ActivityStreams activities and writes them out. Args: response: response dict with values based on OpenSocial ActivityStreams REST API, as returned by Source.get_activities_response() actor: optional ActivityStreams actor dict for current user. Only used for Atom output. """ expected_formats = ('activitystreams', 'json', 'atom', 'xml', 'html', 'json-mf2') format = self.request.get('format') or self.request.get('output') or 'json' if format not in expected_formats: raise exc.HTTPBadRequest('Invalid format: %s, expected one of %r' % (format, expected_formats)) activities = response['items'] self.response.headers['Access-Control-Allow-Origin'] = '*' if format in ('json', 'activitystreams'): self.response.headers['Content-Type'] = 'application/json' self.response.out.write(json.dumps(response, indent=2)) elif format == 'atom': self.response.headers['Content-Type'] = 'text/xml' self.response.out.write(atom.activities_to_atom( activities, actor, host_url=self.request.host_url + '/', request_url=self.request.path_url)) elif format == 'xml': self.response.headers['Content-Type'] = 'text/xml' self.response.out.write(XML_TEMPLATE % util.to_xml(response)) elif format == 'html': self.response.headers['Content-Type'] = 'text/html' self.response.out.write(microformats2.activities_to_html(activities)) elif format == 'json-mf2': self.response.headers['Content-Type'] = 'application/json' items = [microformats2.object_to_json(a['object'], a.get('context', {})) for a in activities] self.response.out.write(json.dumps({'items': items}, indent=2)) if 'plaintext' in self.request.params: # override response content type self.response.headers['Content-Type'] = 'text/plain'
def test_render_share_of_obj_with_attachments(self): """This is e.g. a retweet of a quote tweet.""" activity = { 'verb': 'share', 'object': { 'content': 'RT @quoter: comment', 'attachments': [{ 'objectType': 'note', 'content': 'quoted text', }], }, } out = atom.activities_to_atom([activity], test_twitter.ACTOR, title='my title') self.assert_multiline_in(""" RT @quoter: comment <blockquote> quoted text </blockquote> """, out)
def test_author_email(self): """inReplyTo should be translated to thr:in-reply-to.""" activity = { 'object': { 'content': 'foo', 'author': { 'displayName': 'Mrs. Foo', 'email': '*****@*****.**', }, } } self.assert_multiline_in("""\ <author> <activity:object-type>http://activitystrea.ms/schema/1.0/person</activity:object-type> <uri></uri> <name>Mrs. Foo</name> <email>[email protected]</email> </author> """, atom.activities_to_atom([activity], {}), ignore_blanks=True)
def test_image_duplicated_in_attachment(self): """If an image is also in an attachment, don't render a duplicate. https://github.com/snarfed/twitter-atom/issues/8 """ activity = { 'object': { 'content': 'foo bar', 'image': [ { 'url': 'http://pics/1.jpg' }, { 'url': 'http://pics/2.jpg' }, ], 'attachments': [{ 'objectType': 'note', 'image': { 'url': 'http://pics/2.jpg' }, }, { 'objectType': 'image', 'image': { 'url': 'http://pics/1.jpg' }, }], }, } got = atom.activities_to_atom([activity], {}) self.assertNotIn( '<img class="u-photo" src="http://pics/1.jpg" alt="" />', got) self.assertNotIn( '<img class="u-photo" src="http://pics/2.jpg" alt="" />', got)
def test_object_only(self): out = atom.activities_to_atom([{ 'object': { 'displayName': 'Den oberoende sociala webben 2015', 'id': 'http://voxpelli.com/2015/09/oberoende-sociala-webben-2015/', 'author': { 'image': { 'url': 'http://voxpelli.com/avatar.jpg' }, 'url': 'http://voxpelli.com/', }, 'url': 'http://voxpelli.com/2015/09/oberoende-sociala-webben-2015/', } }], test_twitter.ACTOR) for expected in ( '<link rel="alternate" type="text/html" href="http://voxpelli.com/2015/09/oberoende-sociala-webben-2015/" />', '<link rel="self" type="application/atom+xml" href="http://voxpelli.com/2015/09/oberoende-sociala-webben-2015/" />', '<uri>http://voxpelli.com/</uri>', ): self.assert_multiline_in(expected, out)
def test_attachments(self): got = atom.activities_to_atom( [{ 'object': { 'attachments': [ { 'objectType': 'note', 'url': 'http://p', 'content': 'note content' }, { 'objectType': 'x', 'url': 'http://x' }, { 'objectType': 'image', 'image': [{ 'url': 'http://pic' }, { 'url': 'ignore' }], }, { 'objectType': 'article', 'url': 'http://a', 'content': 'article content', 'author': { 'displayName': 'Mr. Foo', 'url': 'http://x/', # image shouldn't be included 'image': { 'url': 'http://x/avatar.jpg' }, }, }, { 'objectType': 'note', 'content': 'quoted tweet with photo', 'attachments': [{ 'objectType': 'image', 'image': [{ 'url': 'http://quote/tweet/pic' }], }], }, ] } }], None) self.assert_multiline_in( """ <p> <img class="u-photo" src="http://pic" alt="" /> </p> <blockquote> note content </blockquote> <blockquote> <a class="p-name u-url" href="http://x/">Mr. Foo</a>: article content </blockquote> <blockquote> quoted tweet with photo <p> <img class="u-photo" src="http://quote/tweet/pic" alt="" /> </p> </blockquote> """, got)
def get(self): cookie = 'sessionid=%s' % urllib.parse.quote( util.get_required_param(self, 'sessionid').encode('utf-8')) logging.info('Fetching with Cookie: %s', cookie) host_url = self.request.host_url + '/' ig = instagram.Instagram() try: resp = ig.get_activities_response(group_id=source.FRIENDS, scrape=True, cookie=cookie) except Exception as e: status, text = util.interpret_http_exception(e) if status in ('403', ): self.response.headers['Content-Type'] = 'application/atom+xml' self.response.out.write( atom.activities_to_atom([{ 'object': { 'url': self.request.url, 'content': 'Your instagram-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>' % host_url, }, }], {}, title='instagram-atom', host_url=host_url, request_url=self.request.path_url)) return elif status == '401': # IG returns 401 sometimes as a form of rate limiting or bot detection self.response.status = '429' elif status: self.response.status = status else: logging.exception('oops!') self.response.status = 500 self.response.text = text or 'Unknown error.' return actor = resp.get('actor') if actor: logging.info('Logged in as %s (%s)', actor.get('username'), actor.get('displayName')) else: logging.warning("Couldn't determine Instagram user!") activities = resp.get('items', []) format = self.request.get('format', 'atom') if format == 'atom': title = 'instagram-atom feed for %s' % ig.actor_name(actor) self.response.headers['Content-Type'] = 'application/atom+xml' self.response.out.write( atom.activities_to_atom(activities, actor, title=title, host_url=host_url, request_url=self.request.path_url, xml_base='https://www.instagram.com/')) elif format == 'html': self.response.headers['Content-Type'] = 'text/html' self.response.out.write( microformats2.activities_to_html(activities)) else: self.abort(400, 'format must be either atom or html; got %s' % format)
def test_render_content_as_html(self): self.assert_multiline_in( '<a href="https://twitter.com/foo">@twitter</a> meets @seepicturely at <a href="https://twitter.com/search?q=%23tcdisrupt">#tcdisrupt</a> <3 <a href="http://first/link/">first</a> <a href="http://instagr.am/p/MuW67/">instagr.am/p/MuW67</a> ', atom.activities_to_atom([copy.deepcopy(test_twitter.ACTIVITY)], test_twitter.ACTOR, title='my title'))
def test_title(self): self.assert_multiline_in( '\n<title>my title</title>', atom.activities_to_atom([copy.deepcopy(test_facebook.ACTIVITY)], test_facebook.ACTOR, title='my title'))
def test_render_missing_object_type_and_verb(self): activity = {'object': {'content': 'foo'}} out = atom.activities_to_atom([activity], test_twitter.ACTOR, title='my title') self.assertNotIn('>http://activitystrea.ms/schema/1.0/<', out)
def test_rels(self): got = atom.activities_to_atom([], {}, rels={'foo': 'bar', 'baz': 'baj'}) self.assert_multiline_in('<link rel="foo" href="bar" />', got) self.assert_multiline_in('<link rel="baz" href="baj" />', got)
def get(self): try: cookie = 'c_user=%(c_user)s; xs=%(xs)s' % self.request.params except KeyError: return self.abort(400, 'Query parameters c_user and xs are required') all = self.request.get('all', '').lower() == 'true' if all: logging.info( 'Ignoring blacklist and returning all items due to all=true!') logging.info('Fetching with Cookie: %s', cookie) resp = urllib.request.urlopen( urllib.request.Request( # ?sk=hcr uses the Most Recent news feed option (instead of Top Stories) 'https://m.facebook.com/?sk=h_chr', headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0) Gecko/20100101 Firefox/39.0', 'Cookie': cookie.encode('utf-8'), })) body = resp.read() logging.info('Response: %s', resp.getcode()) soup = BeautifulSoup(body, 'html.parser') # logging.debug(soup.prettify().encode('utf-8')) if resp.getcode() in (401, 403): self.response.headers['Content-Type'] = 'application/atom+xml' host_url = self.request.host_url + '/' self.response.out.write( atom.activities_to_atom([{ 'object': { 'url': self.request.url, 'content': 'Your facebook-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>' % host_url, }, }], {}, title='facebook-atom', host_url=host_url, request_url=self.request.path_url)) return elif resp.getcode() != 200: return self.abort(502, "Facebook fetch failed") home_link = soup.find( 'a', href=re.compile(r'/[^?]+\?ref_component=mbasic_home_bookmark.*')) if home_link: href = home_link['href'] logging.info('Logged in for user %s', href[1:href.find('?')]) else: logging.warning("Couldn't determine username or id!") posts = soup.find_all('div', id=re.compile('u_0_.')) logging.info('Found %d posts', len(posts)) entries = [] for post in posts: # look for Full Story link; it's the first a element before Save or More. # (can't use text labels because we don't know language.) save_or_more = post.find( href=re.compile('/(save/story|nfx/basic/direct_actions)/.+')) if not save_or_more: logging.info('Skipping one due to missing Save and More links') continue link = save_or_more.find_previous_sibling('a') if not link: logging.info('Skipping one due to missing Full Story link') continue story = post.div.get_text(' ') if not all: if blacklisted(story): continue header_div = post.find_previous_sibling('div') if header_div: header = header_div.find('h3') if header and blacklisted(header.get_text(' ')): continue # strip footer sections: # * save_or_more.parent: like count, comment count, etc. # * ...previous_sibling: relative publish time (e.g. '1 hr') # * ...next_sibling: most recent comment # # these all change over time, which we think triggers readers to show # stories again even when you've already read them. # https://github.com/snarfed/facebook-atom/issues/11 if save_or_more.parent.previous_sibling: save_or_more.parent.previous_sibling.extract() # this is a generator, so it's flaky (not sure why), so fully evaluate it # with list() before using it. nexts = list(save_or_more.parent.next_siblings) for next in nexts: next.extract() save_or_more.parent.extract() for a in post.find_all('a'): if a.get('href'): a['href'] = clean_url(a['href']) for elem in post.find_all() + [post]: for attr in OMIT_ATTRIBUTES: del elem[attr] entries.append({ 'url': xml.sax.saxutils.escape(clean_url(link['href'])), 'title': xml.sax.saxutils.escape(story[:100]), 'content': post.prettify(), }) entries.sort(key=operator.itemgetter('url'), reverse=True) self.response.headers['Content-Type'] = 'application/atom+xml' self.response.out.write( HEADER % {'updated': datetime.datetime.now().isoformat('T') + 'Z'}) for entry in entries: self.response.out.write((ENTRY % entry).encode('utf-8')) self.response.out.write(FOOTER)
def write_response(self, response, actor=None, url=None, title=None): """Converts ActivityStreams activities and writes them out. Args: response: response dict with values based on OpenSocial ActivityStreams REST API, as returned by Source.get_activities_response() actor: optional ActivityStreams actor dict for current user. Only used for Atom and JSON Feed output. url: the input URL title: string, Used in Atom and JSON Feed output """ expected_formats = ('activitystreams', 'json', 'atom', 'xml', 'html', 'json-mf2', 'jsonfeed') format = self.request.get('format') or self.request.get( 'output') or 'json' if format not in expected_formats: raise exc.HTTPBadRequest('Invalid format: %s, expected one of %r' % (format, expected_formats)) activities = response['items'] if format in ('json', 'activitystreams'): # list of official MIME types: # https://www.iana.org/assignments/media-types/media-types.xhtml self.response.headers['Content-Type'] = 'application/json' self.response.out.write(json.dumps(response, indent=2)) elif format == 'atom': self.response.headers['Content-Type'] = 'application/atom+xml' hub = self.request.get('hub') reader = self.request.get('reader', 'true').lower() if reader not in ('true', 'false'): self.abort(400, 'reader param must be either true or false') self.response.out.write( atom.activities_to_atom(activities, actor, host_url=url or self.request.host_url + '/', request_url=self.request.url, xml_base=util.base_url(url), title=title, rels={'hub': hub} if hub else None, reader=(reader == 'true'))) self.response.headers.add( 'Link', str('<%s>; rel="self"' % self.request.url)) if hub: self.response.headers.add('Link', str('<%s>; rel="hub"' % hub)) elif format == 'xml': self.response.headers['Content-Type'] = 'application/xml' self.response.out.write(XML_TEMPLATE % util.to_xml(response)) elif format == 'html': self.response.headers['Content-Type'] = 'text/html' self.response.out.write( microformats2.activities_to_html(activities)) elif format == 'json-mf2': self.response.headers['Content-Type'] = 'application/json' items = [microformats2.activity_to_json(a) for a in activities] self.response.out.write(json.dumps({'items': items}, indent=2)) elif format == 'jsonfeed': self.response.headers['Content-Type'] = 'application/json' try: jf = jsonfeed.activities_to_jsonfeed(activities, actor=actor, title=title, feed_url=self.request.url) except TypeError as e: raise exc.HTTPBadRequest('Unsupported input data: %s' % e) self.response.out.write(json.dumps(jf, indent=2)) if 'plaintext' in self.request.params: # override response content type self.response.headers['Content-Type'] = 'text/plain'
def test_strip_html_tags_from_titles(self): activity = copy.deepcopy(test_facebook.ACTIVITY) activity['displayName'] = '<p>foo & <a href="http://bar">bar</a></p>' self.assert_multiline_in( '<title>foo & bar</title>\n', atom.activities_to_atom([activity], test_facebook.ACTOR))
def write_response(self, response, actor=None, url=None, title=None): """Converts ActivityStreams activities and writes them out. Args: response: response dict with values based on OpenSocial ActivityStreams REST API, as returned by Source.get_activities_response() actor: optional ActivityStreams actor dict for current user. Only used for Atom output. url: the input URL title: string, Used in Atom output """ expected_formats = ('activitystreams', 'json', 'atom', 'xml', 'html', 'json-mf2') format = self.request.get('format') or self.request.get( 'output') or 'json' if format not in expected_formats: raise exc.HTTPBadRequest('Invalid format: %s, expected one of %r' % (format, expected_formats)) activities = response['items'] self.response.headers.update({ 'Access-Control-Allow-Origin': '*', 'Strict-Transport-Security': 'max-age=16070400; includeSubDomains; preload', # 6 months }) if format in ('json', 'activitystreams'): self.response.headers['Content-Type'] = 'application/json' self.response.out.write(json.dumps(response, indent=2)) elif format == 'atom': self.response.headers['Content-Type'] = 'text/xml' hub = self.request.get('hub') self.response.out.write( atom.activities_to_atom(activities, actor, host_url=url or self.request.host_url + '/', request_url=self.request.url, xml_base=util.base_url(url), title=title, rels={'hub': hub} if hub else None)) self.response.headers.add( 'Link', str('<%s>; rel="self"' % self.request.url)) if hub: self.response.headers.add('Link', str('<%s>; rel="hub"' % hub)) elif format == 'xml': self.response.headers['Content-Type'] = 'text/xml' self.response.out.write(XML_TEMPLATE % util.to_xml(response)) elif format == 'html': self.response.headers['Content-Type'] = 'text/html' self.response.out.write( microformats2.activities_to_html(activities)) elif format == 'json-mf2': self.response.headers['Content-Type'] = 'application/json' items = [microformats2.object_to_json(a) for a in activities] self.response.out.write(json.dumps({'items': items}, indent=2)) if 'plaintext' in self.request.params: # override response content type self.response.headers['Content-Type'] = 'text/plain'
def write_response(self, response, actor=None, url=None, title=None, hfeed=None): """Converts ActivityStreams activities and writes them out. Args: response: response dict with values based on OpenSocial ActivityStreams REST API, as returned by Source.get_activities_response() actor: optional ActivityStreams actor dict for current user. Only used for Atom and JSON Feed output. url: the input URL title: string, used in feed output (Atom, JSON Feed, RSS) hfeed: dict, parsed mf2 h-feed, if available """ format = self.request.get('format') or self.request.get( 'output') or 'json' if format not in FORMATS: raise exc.HTTPBadRequest('Invalid format: %s, expected one of %r' % (format, FORMATS)) if 'plaintext' in self.request.params: # override content type self.response.headers['Content-Type'] = 'text/plain' else: content_type = FORMATS.get(format) if content_type: self.response.headers['Content-Type'] = content_type if self.request.method == 'HEAD': return activities = response['items'] try: if format in ('as1', 'json', 'activitystreams'): self.response.out.write(json_dumps(response, indent=2)) elif format == 'as2': response.update({ 'items': [as2.from_as1(a) for a in activities], 'totalItems': response.pop('totalResults', None), 'updated': response.pop('updatedSince', None), 'filtered': None, 'sorted': None, }) self.response.out.write( json_dumps(util.trim_nulls(response), indent=2)) elif format == 'atom': hub = self.request.get('hub') reader = self.request.get('reader', 'true').lower() if reader not in ('true', 'false'): self.abort(400, 'reader param must be either true or false') if not actor and hfeed: actor = microformats2.json_to_object({ 'properties': hfeed.get('properties', {}), }) self.response.out.write( atom.activities_to_atom(activities, actor, host_url=url or self.request.host_url + '/', request_url=self.request.url, xml_base=util.base_url(url), title=title, rels={'hub': hub} if hub else None, reader=(reader == 'true'))) self.response.headers.add( 'Link', str('<%s>; rel="self"' % self.request.url)) if hub: self.response.headers.add('Link', str('<%s>; rel="hub"' % hub)) elif format == 'rss': if not title: title = 'Feed for %s' % url self.response.out.write( rss.from_activities(activities, actor, title=title, feed_url=self.request.url, hfeed=hfeed, home_page_url=util.base_url(url))) elif format in ('as1-xml', 'xml'): self.response.out.write(XML_TEMPLATE % util.to_xml(response)) elif format == 'html': self.response.out.write( microformats2.activities_to_html(activities)) elif format in ('mf2-json', 'json-mf2'): items = [microformats2.activity_to_json(a) for a in activities] self.response.out.write(json_dumps({'items': items}, indent=2)) elif format == 'jsonfeed': try: jf = jsonfeed.activities_to_jsonfeed( activities, actor=actor, title=title, feed_url=self.request.url) except TypeError as e: raise exc.HTTPBadRequest('Unsupported input data: %s' % e) self.response.out.write(json_dumps(jf, indent=2)) except ValueError as e: logging.warning('converting to output format failed', stack_info=True) self.abort(400, 'Could not convert to %s: %s' % (format, str(e)))
def test_render_event_omits_object_type_verb(self): activity = {'object': {'content': 'X <y> http://z?w a&b c&d e>f'}} out = atom.activities_to_atom([activity], test_twitter.ACTOR, title='my title') self.assert_multiline_in('X <y> http://z?w a&b c&d e>f', out) self.assertNotIn('a&b', out)