def is_private(self): """Returns True if this Twitter account is protected. https://dev.twitter.com/rest/reference/get/users/show#highlighter_25173 https://support.twitter.com/articles/14016 https://support.twitter.com/articles/20169886 """ return json_loads(self.auth_entity.get().user_json).get('protected')
def handle_feed(feed, source): """Handles a Superfeedr JSON feed. Creates :class:`models.BlogPost` entities and adds propagate-blogpost tasks for new items. http://documentation.superfeedr.com/schema.html#json http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications Args: feed: unicode string, Superfeedr JSON feed source: Blogger, Tumblr, or WordPress """ logging.info('Source: %s %s', source.label(), source.key.string_id()) logging.info('Raw feed: %s', feed) if source.status != 'enabled': logging.info('Dropping because source is %s', source.status) return elif 'webmention' not in source.features: logging.info("Dropping because source doesn't have webmention feature") return for item in json_loads(feed).get('items', []): url = item.get('permalinkUrl') or item.get('id') if not url: logging.error('Dropping feed item without permalinkUrl or id!') continue # extract links from content, discarding self links. # # i don't use get_webmention_target[s]() here because they follows redirects # and fetch link contents, and this handler should be small and fast and try # to return a response to superfeedr successfully. # # TODO: extract_links currently has a bug that makes it drop trailing # slashes. ugh. fix that. content = item.get('content') or item.get('summary', '') links = [util.clean_url(util.unwrap_t_umblr_com(l)) for l in util.extract_links(content) if util.domain_from_link(l) not in source.domains] unique = [] for link in util.dedupe_urls(links): if len(link) <= _MAX_STRING_LENGTH: unique.append(link) else: logging.info('Giving up on link over %s chars! %s', _MAX_STRING_LENGTH, link) logging.info('Found links: %s', unique) if len(url) > _MAX_KEYPART_BYTES: logging.warning('Blog post URL is too long (over 500 chars)! Giving up.') bp = models.BlogPost(id=url[:_MAX_KEYPART_BYTES], source=source.key, feed_item=item, failed=unique) else: bp = models.BlogPost(id=url, source=source.key, feed_item=item, unsent=unique) bp.get_or_save()
def finish(self, auth_entity, state=None): if auth_entity: user_json = json_loads(auth_entity.user_json) # find instagram profile URL urls = user_json.get('rel-me', []) logging.info('rel-mes: %s', urls) for url in util.trim_nulls(urls): if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN: username = urllib.parse.urlparse(url).path.strip('/') break else: self.messages.add( 'No Instagram profile found. Please <a href="https://indieauth.com/setup">add an Instagram rel-me link</a>, then try again.' ) return self.redirect('/') # check that instagram profile links to web site try: actor = gr_instagram.Instagram(scrape=True).get_actor( username, ignore_rate_limit=True) except Exception as e: code, _ = util.interpret_http_exception(e) if code in Instagram.RATE_LIMIT_HTTP_CODES: self.messages.add( '<a href="https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427">Apologies, Instagram is temporarily blocking us.</a> Please try again later!' ) return self.redirect('/') else: raise if not actor: self.messages.add( "Couldn't find Instagram user '%s'. Please check your site's rel-me link and your Instagram account." % username) return self.redirect('/') canonicalize = util.UrlCanonicalizer(redirects=False) website = canonicalize(auth_entity.key.id()) urls = [canonicalize(u) for u in microformats2.object_urls(actor)] logging.info('Looking for %s in %s', website, urls) if website not in urls: self.messages.add( "Please add %s to your Instagram profile's website or bio field and try again." % website) return self.redirect('/') # check that the instagram account is public if not gr_source.Source.is_public(actor): self.messages.add( 'Your Instagram account is private. Bridgy only supports public accounts.' ) return self.redirect('/') self.maybe_add_or_delete_source(Instagram, auth_entity, state, actor=actor)
def test_inbox_follow_accept(self, mock_head, mock_get, mock_post): mock_head.return_value = requests_response(url='https://realize.be/') mock_get.side_effect = [ # source actor requests_response(FOLLOW_WITH_ACTOR['actor'], content_type=common.CONTENT_TYPE_AS2), # target post webmention discovery requests_response( '<html><head><link rel="webmention" href="/webmention"></html>'), ] mock_post.return_value = requests_response() got = self.client.post('/foo.com/inbox', json=FOLLOW_WRAPPED) self.assertEqual(200, got.status_code) as2_headers = copy.deepcopy(common.HEADERS) as2_headers.update(common.CONNEG_HEADERS_AS2_HTML) mock_get.assert_has_calls(( call(FOLLOW['actor'], headers=as2_headers, stream=True, timeout=15), )) # check AP Accept self.assertEqual(2, len(mock_post.call_args_list)) args, kwargs = mock_post.call_args_list[0] self.assertEqual(('http://follower/inbox',), args) self.assertEqual(ACCEPT, json_loads(kwargs['data'])) # check webmention args, kwargs = mock_post.call_args_list[1] self.assertEqual(('https://realize.be/webmention',), args) self.assertEqual({ 'source': 'http://localhost/render?source=https%3A%2F%2Fmastodon.social%2F6d1a&target=https%3A%2F%2Frealize.be%2F', 'target': 'https://realize.be/', }, kwargs['data']) resp = Response.get_by_id('https://mastodon.social/6d1a https://realize.be/') self.assertEqual('in', resp.direction) self.assertEqual('activitypub', resp.protocol) self.assertEqual('complete', resp.status) self.assertEqual(FOLLOW_WITH_ACTOR, json_loads(resp.source_as2)) # check that we stored a Follower object follower = Follower.get_by_id('realize.be %s' % (FOLLOW['actor'])) self.assertEqual('active', follower.status) self.assertEqual(FOLLOW_WRAPPED_WITH_ACTOR, json_loads(follower.last_follow))
def __getattr__(self, name): """Lazily load the auth entity and instantiate :attr:`self.gr_source`. Once :attr:`self.gr_source` is set, this method will *not* be called; :attr:`gr_source` will be returned normally. """ if name != 'gr_source': return getattr(super(), name) super_attr = getattr(super(), name, None) if super_attr: return super_attr elif not self.auth_entity: return None auth_entity = self.auth_entity.get() try: refresh_token = auth_entity.refresh_token self.gr_source = self.GR_CLASS(refresh_token) return self.gr_source except AttributeError: logger.info('no refresh_token') args = auth_entity.access_token() if not isinstance(args, tuple): args = (args,) kwargs = {} if self.key.kind() == 'FacebookPage' and auth_entity.type == 'user': kwargs = {'user_id': self.key_id()} elif self.key.kind() == 'Instagram': kwargs = {'scrape': True, 'cookie': INSTAGRAM_SESSIONID_COOKIE} elif self.key.kind() == 'Mastodon': args = (auth_entity.instance(),) + args inst = auth_entity.app.get().instance_info kwargs = { 'user_id': json_loads(auth_entity.user_json).get('id'), # https://docs-develop.pleroma.social/backend/API/differences_in_mastoapi_responses/#instance 'truncate_text_length': json_loads(inst).get('max_toot_chars') if inst else None, } elif self.key.kind() == 'Twitter': kwargs = {'username': self.key_id(), 'scrape_headers': TWITTER_SCRAPE_HEADERS} self.gr_source = self.GR_CLASS(*args, **kwargs) return self.gr_source
def get_or_save(self, source, restart=False): resp = super(Response, self).get_or_save() if (self.type != resp.type or source.gr_source.activity_changed( json_loads(resp.response_json), json_loads(self.response_json), log=True)): logging.info('Response changed! Re-propagating. Original: %s' % resp) resp.old_response_jsons = resp.old_response_jsons[:10] + [ resp.response_json ] resp.response_json = self.response_json resp.restart(source) elif restart and resp is not self: # ie it already existed resp.restart(source) return resp
def test_blocks(self): self.mox.StubOutWithMock(FakeSource, 'get_blocklist') blocks = [{'blockee': '1'}, {'blockee': '2'}] FakeSource.get_blocklist().AndReturn(blocks) self.mox.ReplayAll() resp = app.application.get_response('/fake/123/@blocks/') self.assertEqual(200, resp.status_int) self.assert_equals({'items': blocks}, json_loads(resp.body))
def test_new_massages_profile_image(self): """We should use profile_image_url_https and drop '_normal' if possible.""" user = json_loads(self.auth_entity.user_json) user['profile_image_url_https'] = 'https://foo_normal.xyz' self.auth_entity.user_json = json_dumps(user) self.assertEqual( 'https://foo.xyz', Twitter.new(self.handler, auth_entity=self.auth_entity).picture)
def test_url_as1_to_mf2_json(self): self.expect_requests_get('http://my/posts.json', AS1) self.mox.ReplayAll() resp = app.application.get_response( '/url?url=http://my/posts.json&input=as1&output=mf2-json') self.assert_equals(200, resp.status_int) self.assert_equals('application/mf2+json; charset=utf-8', resp.headers['Content-Type']) self.assert_equals(MF2, json_loads(resp.body))
def read_json(filename): """Reads JSON from a file. Attaches the filename to exceptions.""" try: with open(filename, encoding='utf-8') as f: # note that ujson allows embedded newlines in strings, which we have in eg # note_with_whitespace.as.json and frriends. return json_loads(f.read()) except Exception as e: e.args = ('%s: ' % filename, ) + e.args raise
def test_blocks_rate_limited_partial(self): self.mox.StubOutWithMock(FakeSource, 'get_blocklist') blocks = [{'blockee': '1'}, {'blockee': '2'}] FakeSource.get_blocklist().AndRaise( source.RateLimited('foo', partial=blocks)) self.mox.ReplayAll() resp = app.application.get_response('/fake/123/@blocks/') self.assertEqual(200, resp.status_int) self.assert_equals({'items': blocks}, json_loads(resp.body))
def test_url_as2_response_to_as1(self): self.expect_requests_get('http://my/posts.json', AS2_RESPONSE) self.mox.ReplayAll() resp = app.application.get_response( '/url?url=http://my/posts.json&input=as2&output=as1') self.assert_equals(200, resp.status_int) self.assert_equals('application/stream+json; charset=utf-8', resp.headers['Content-Type']) self.assert_equals(AS1_RESPONSE, json_loads(resp.body))
def prune_activity_json(response): """Prune the Response.activity_json property. Background: https://github.com/snarfed/bridgy/issues/68 """ response.activity_json = json_dumps( util.prune_activity(json_loads(response.activity_json))) # helps avoid hitting the instance memory limit gc.collect() yield op.db.Put(response)
def create_comment(self, post_url, author_name, author_url, content): """Creates a new comment in the source silo. If the last part of the post URL is numeric, e.g. http://site/post/123999, it's used as the post id. Otherwise, we extract the last part of the path as the slug, e.g. http: / / site / post / the-slug, and look up the post id via the API. Args: post_url: string author_name: string author_url: string content: string Returns: JSON response dict with 'id' and other fields """ auth_entity = self.auth_entity.get() logger.info(f'Determining WordPress.com post id for {post_url}') # extract the post's slug and look up its post id path = urllib.parse.urlparse(post_url).path if path.endswith('/'): path = path[:-1] slug = path.split('/')[-1] try: post_id = int(slug) except ValueError: logger.info(f'Looking up post id for slug {slug}') url = API_POST_SLUG_URL % (auth_entity.blog_id, slug) post_id = self.urlopen(auth_entity, url).get('ID') if not post_id: return self.error('Could not find post id', report=False) logger.info(f'Post id is {post_id}') # create the comment url = API_CREATE_COMMENT_URL % (auth_entity.blog_id, post_id) content = f'<a href="{author_url}">{author_name}</a>: {content}' data = {'content': content.encode()} try: resp = self.urlopen(auth_entity, url, data=urllib.parse.urlencode(data)) except urllib.error.HTTPError as e: code, body = util.interpret_http_exception(e) try: parsed = json_loads(body) if body else {} if ((code == '400' and parsed.get('error') == 'invalid_input') or (code == '403' and parsed.get('message') == 'Comments on this post are closed')): return parsed # known error: https://github.com/snarfed/bridgy/issues/161 except ValueError: pass # fall through raise e resp['id'] = resp.pop('ID', None) return resp
def finish(self, auth_entity, state=None): if not auth_entity: self.maybe_add_or_delete_source(Medium, auth_entity, state) return user = json_loads(auth_entity.user_json)['data'] username = user['username'] if not username.startswith('@'): username = '******' + username # fetch publications this user contributes or subscribes to. # (sadly medium's API doesn't tell us the difference unless we fetch each # pub's metadata separately.) # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications auth_entity.publications_json = auth_entity.get( oauth_medium.API_BASE + 'users/%s/publications' % user['id']).text auth_entity.put() pubs = json_loads(auth_entity.publications_json).get('data') if not pubs: self.maybe_add_or_delete_source(Medium, auth_entity, state, id=username) return # add user profile to start of pubs list user['id'] = username pubs.insert(0, user) vars = { 'action': '/medium/add', 'state': state, 'auth_entity_key': auth_entity.key.urlsafe().decode(), 'blogs': [{ 'id': p['id'], 'title': p.get('name', ''), 'url': p.get('url', ''), 'pretty_url': util.pretty_link(str(p.get('url', ''))), 'image': p.get('imageUrl', ''), } for p in pubs if p.get('id')], } logging.info('Rendering choose_blog.html with %s', vars) self.response.headers['Content-Type'] = 'text/html' self.response.out.write(JINJA_ENV.get_template('choose_blog.html').render(**vars))
def test_likes(self): self.source.put() key = self.store_activity() resp = self.get_response(f'likes?id=tag:instagram.com,2013:123_456&{self.auth}', json=HTML_PHOTO_LIKES_RESPONSE) self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) self.assertEqual(LIKE_OBJS, resp.json) activity = json_loads(key.get().activity_json) self.assertEqual(LIKE_OBJS, activity['object']['tags'])
def post(self, domain): logging.info('Got: %s', self.request.body) # parse and validate AS2 activity try: activity = json_loads(self.request.body) assert activity except (TypeError, ValueError, AssertionError): self.error("Couldn't parse body as JSON", exc_info=True) obj = activity.get('object') or {} if isinstance(obj, str): obj = {'id': obj} type = activity.get('type') if type == 'Accept': # eg in response to a Follow return # noop if type == 'Create': type = obj.get('type') elif type not in SUPPORTED_TYPES: self.error('Sorry, %s activities are not supported yet.' % type, status=501) # TODO: verify signature if there is one if type == 'Undo' and obj.get('type') == 'Follow': # skip actor fetch below; we don't need it to undo a follow return self.undo_follow(self.redirect_unwrap(activity)) elif type == 'Delete': id = obj.get('id') if isinstance(id, str): # assume this is an actor # https://github.com/snarfed/bridgy-fed/issues/63 for key in Follower.query().iter(keys_only=True): if key.id().split(' ')[-1] == id: key.delete() return # fetch actor if necessary so we have name, profile photo, etc for elem in obj, activity: actor = elem.get('actor') if actor and isinstance(actor, str): elem['actor'] = common.get_as2(actor).json() activity_unwrapped = self.redirect_unwrap(activity) if type == 'Follow': return self.accept_follow(activity, activity_unwrapped) # send webmentions to each target as1 = as2.to_as1(activity) self.send_webmentions(as1, proxy=True, protocol='activitypub', source_as2=json_dumps(activity_unwrapped))
def test_likes(self): self.source.put() key = self.store_activity() resp = self.get_response(f'likes?id=tag:facebook.com,2013:123&{self.auth}', text=MBASIC_HTML_REACTIONS) self.assertEqual(200, resp.status_int, resp.text) self.assert_equals(MBASIC_REACTION_TAGS('123'), resp.json) activity = json_loads(key.get().activity_json) self.assert_equals(MBASIC_REACTION_TAGS('123'), activity['object']['tags'])
def post(self): entity = self.load_source(param='key') if not isinstance(entity, Webmentions): self.abort(400, 'Unexpected key kind %s', entity.key.kind()) # run OPD to pick up any new SyndicatedPosts. note that we don't refetch # their h-feed, so if they've added a syndication URL since we last crawled, # retry won't make us pick it up. background in #524. if entity.key.kind() == 'Response': source = entity.source.get() for activity in [json_loads(a) for a in entity.activities_json]: originals, mentions = original_post_discovery.discover( source, activity, fetch_hfeed=False, include_redirect_sources=False) entity.unsent += original_post_discovery.targets_for_response( json_loads(entity.response_json), originals=originals, mentions=mentions) entity.restart() self.messages.add('Retrying. Refresh in a minute to see the results!') self.redirect(self.request.get('redirect_to') or entity.source.get().bridgy_url(self))
def test_post(self): resp = self.post('post', data='silowe html') self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) self.assert_equals(self.activities[0], util.trim_nulls(resp.json)) activities = Activity.query().fetch() self.assertEqual(1, len(activities)) self.assertEqual(self.source, activities[0].source) self.assert_equals(self.activities[0], util.trim_nulls(json_loads(activities[0].activity_json))) self.assertEqual('silowe html', activities[0].html)
def test_user_handler(self, mock_get): mock_get.return_value = requests_response(self.html, url = 'https://foo.com/') got = application.get_response('/acct:foo.com', headers={'Accept': 'application/json'}) self.assertEqual(200, got.status_int) self.assertEqual('application/json; charset=utf-8', got.headers['Content-Type']) mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS, stream=True, timeout=util.HTTP_TIMEOUT) self.assertEqual(self.expected_webfinger, json_loads(got.body.decode())) # check that magic key is persistent again = json_loads(application.get_response( '/acct:foo.com', headers={'Accept': 'application/json'}).body.decode()) self.assertEqual(self.key.href(), again['magic_keys'][0]['value']) links = {l['rel']: l['href'] for l in again['links']} self.assertEqual(self.key.href(), links['magic-public-key'])
def get_or_save(self, source, restart=False): resp = super().get_or_save() if (self.type != resp.type or source.gr_source.activity_changed(json_loads(resp.response_json), json_loads(self.response_json), log=True)): logger.info(f'Response changed! Re-propagating. Original: {resp}') resp.old_response_jsons = [resp.response_json] + resp.old_response_jsons[:10] response_json_to_append = json_loads(self.response_json) source.gr_source.append_in_reply_to(json_loads(resp.response_json), response_json_to_append) self.response_json = json_dumps(util.trim_nulls(response_json_to_append)) resp.response_json = self.response_json resp.restart(source) elif restart and resp is not self: # ie it already existed resp.restart(source) return resp
def test_post(self): self.source.put() resp = self.get_response('post', data=HTML_VIDEO_COMPLETE) self.assertEqual(200, resp.status_code, resp.get_data(as_text=True)) self.assertEqual(HTML_VIDEO_ACTIVITY_FULL, resp.json) activities = Activity.query().fetch() self.assertEqual(1, len(activities)) self.assertEqual(self.source.key, activities[0].source) self.assertEqual(HTML_VIDEO_ACTIVITY_FULL, json_loads(activities[0].activity_json))
def _load_cache(self, name): """Loads resolved_object_ids_json or post_publics_json into self.updates.""" assert name in ('resolved_object_ids', 'post_publics') field = getattr(self, name + '_json') if self.updates is None: self.updates = {} loaded = self.updates.setdefault(name, {}) if not loaded and field: loaded = self.updates[name] = json_loads(field) return loaded
def dead_token(): try: err = json_loads(body)['error'] return (err.get('code') in DEAD_TOKEN_ERROR_CODES or err.get('error_subcode') in DEAD_TOKEN_ERROR_SUBCODES or err.get('message') in DEAD_TOKEN_ERROR_MESSAGES) except: logging.warning( "Couldn't determine whether token is still valid", exc_info=True) return False
def test_user_page_escapes_html_chars(self): html = '<xyz> a&b' escaped = '<xyz> a&b' activity = json_loads(self.responses[0].activities_json[0]) activity['object']['content'] = escaped self.responses[0].activities_json = [json_dumps(activity)] resp = json_loads(self.responses[0].response_json) resp['content'] = escaped self.responses[0].response_json = json_dumps(resp) self.responses[0].status = 'processing' self.responses[0].put() resp = app.application.get_response(self.sources[0].bridgy_path()) self.assertEquals(200, resp.status_int) self.assertNotIn(html, resp.body) self.assertIn(escaped, resp.body) self.assertNotIn('<span class="glyphicon glyphicon-transfer">', resp.body) self.assertIn('<span class="glyphicon glyphicon-transfer">', resp.body)
def test_instagram_scrape_with_cookie(self): self.expect_requests_get(instagram.HTML_BASE_URL, test_instagram.HTML_FEED_COMPLETE, allow_redirects=False, headers={'Cookie': 'sessionid=c00k1e'}) self.mox.ReplayAll() resp = app.application.get_response( '/instagram/@me/@friends/@app/?cookie=c00k1e&interactive=true') self.assertEqual(200, resp.status_int, resp.body) self.assertEqual('application/json; charset=utf-8', resp.headers['Content-Type']) self.assert_equals(test_instagram.HTML_ACTIVITIES_FULL, json_loads(resp.body)['items'])
def test_post(self): resp = self.get_response('post', text='silowe html') self.assertEqual(200, resp.status_int, resp.text) self.assert_equals(self.activities_no_extras[0], util.trim_nulls(resp.json)) activities = Activity.query().fetch() self.assertEqual(1, len(activities)) self.assertEqual(self.source, activities[0].source) self.assert_equals( self.activities_no_extras[0], util.trim_nulls(json_loads(activities[0].activity_json))) self.assertEqual('silowe html', activities[0].html)
def test_activitypub_error_no_salmon_fallback(self, mock_get, mock_post): mock_get.side_effect = [self.follow, self.actor] mock_post.return_value = requests_response('abc xyz', status=405, url='https://foo.com/inbox') got = application.get_response('/webmention', method='POST', body=urlencode({ 'source': 'http://a/follow', 'target': 'https://fed.brid.gy/', }).encode()) self.assertEqual(502, got.status_int, got.text) self.assertEqual( '405 Client Error: None for url: https://foo.com/inbox ; abc xyz', got.text) mock_get.assert_has_calls(( self.req('http://a/follow'), self.req('http://followee/', headers=CONNEG_HEADERS_AS2_HTML), )) args, kwargs = mock_post.call_args self.assertEqual(('https://foo.com/inbox', ), args) self.assertEqual(self.follow_as2, json_loads(kwargs['data'])) headers = kwargs['headers'] self.assertEqual(CONTENT_TYPE_AS2, headers['Content-Type']) rsa_key = kwargs['auth'].header_signer._rsa._key self.assertEqual(self.key.private_pem(), rsa_key.exportKey()) resp = Response.get_by_id('http://a/follow http://followee/') self.assertEqual('out', resp.direction) self.assertEqual('activitypub', resp.protocol) self.assertEqual('error', resp.status) self.assertEqual(self.follow_mf2, json_loads(resp.source_mf2))
def test_url_as1_to_jsonfeed(self): self.expect_requests_get('http://my/posts.json', AS1) self.mox.ReplayAll() path = '/url?url=http://my/posts.json&input=as1&output=jsonfeed' resp = app.application.get_response(path) self.assert_equals(200, resp.status_int) self.assert_equals('application/json; charset=utf-8', resp.headers['Content-Type']) expected = copy.deepcopy(JSONFEED) expected['feed_url'] = 'http://localhost' + path self.assert_equals(expected, json_loads(resp.body))