def fetch_mf2(self, url, require_mf2=True, raise_errors=False): """Fetches a URL and extracts its mf2 data. Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()` on errors. Args: url: string require_mf2: boolean, whether to return error if no mf2 are found raise_errors: boolean, whether to let error exceptions propagate up or handle them Returns: (:class:`requests.Response`, mf2 data dict) on success, None on failure """ try: resp = util.requests_get(url) resp.raise_for_status() except BaseException as e: if raise_errors: raise util.interpret_http_exception(e) # log exception return self.error('Could not fetch source URL %s' % url) if self.entity: self.entity.html = resp.text # parse microformats soup = util.parse_html(resp) mf2 = util.parse_mf2(soup, resp.url) # special case tumblr's markup: div#content > div.post > div.copy # convert to mf2 and re-parse if not mf2.get('items'): contents = soup.find_all(id='content') if contents: post = contents[0].find_next(class_='post') if post: post['class'] = 'h-entry' copy = post.find_next(class_='copy') if copy: copy['class'] = 'e-content' photo = post.find_next(class_='photo-wrapper') if photo: img = photo.find_next('img') if img: img['class'] = 'u-photo' # TODO: i should be able to pass post or contents[0] to mf2py instead # here, but it returns no items. mf2py bug? doc = str(post) mf2 = util.parse_mf2(doc, resp.url) logging.debug('Parsed microformats2: %s', json_dumps(mf2, indent=2)) items = mf2.get('items', []) if require_mf2 and (not items or not items[0]): return self.error('No microformats2 data found in ' + resp.url, data=mf2, html=""" No <a href="http://microformats.org/get-started">microformats</a> or <a href="http://microformats.org/wiki/microformats2">microformats2</a> found in <a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a> for details (skip to level 2, <em>Publishing on the IndieWeb</em>). """ % (resp.url, util.pretty_link(resp.url))) return resp, mf2
def post(self, source_short_name): logging.info('Params: %self', self.request.params.items()) # strip fragments from source and target url self.source_url = urllib.parse.urldefrag( util.get_required_param(self, 'source'))[0] self.target_url = urllib.parse.urldefrag( util.get_required_param(self, 'target'))[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error('Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = models.sources[source_short_name] domain = domain.lower() self.source = (source_cls.query().filter( source_cls.domains == domain).filter( source_cls.features == 'webmention').filter( source_cls.status == 'enabled').get()) if not self.source: # check for a rel-canonical link. Blogger uses these when it serves a post # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs # epeus.blogspot.com. # https://github.com/snarfed/bridgy/issues/805 mf2 = self.fetch_mf2(self.target_url, require_mf2=False) if not mf2: # fetch_mf2() already wrote the error response return domains = util.dedupe_urls( util.domain_from_link(url) for url in mf2[1]['rels'].get('canonical', [])) if domains: self.source = (source_cls.query().filter( source_cls.domains.IN(domains)).filter( source_cls.features == 'webmention').filter( source_cls.status == 'enabled').get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.GR_CLASS.NAME, domain)) # check that the target URL path is supported target_path = urllib.parse.urlparse(self.target_url).path if target_path in ('', '/'): return self.error( 'Home page webmentions are not currently supported.', status=202) for pattern in self.source.PATH_BLACKLIST: if pattern.match(target_path): return self.error( '%s webmentions are not supported for URL path: %s' % (self.source.GR_CLASS.NAME, target_path), status=202) # create BlogWebmention entity id = '%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe()) # fetch source page fetched = self.fetch_mf2(self.source_url) if not fetched: return resp, mf2 = fetched item = self.find_mention_item(mf2.get('items', [])) if not item: return self.error( 'Could not find target URL %s in source page %s' % (self.target_url, resp.url), data=mf2, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, basestring): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][ 0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += ' <br /> <a href="%s">via %s</a>' % ( source_url, util.domain_from_link(source_url)) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception as e: code, body = util.interpret_http_exception(e) msg = 'Error: %s %s; %s' % (code, e, body) if code == '401': logging.warning('Disabling source due to: %s' % e, exc_info=True) self.source.status = 'disabled' self.source.put() return self.error(msg, status=code, report=self.source.is_beta_user()) elif code == '404': # post is gone return self.error(msg, status=code, report=False) elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): return self.error(msg, status=util.ERROR_HTTP_RETURN_CODE, report=False) elif code or body: return self.error(msg, status=code, report=True) else: raise # write results to datastore self.entity.status = 'complete' self.entity.put() self.response.write(json_dumps(self.entity.published))
def receive(self, email): addr = self.request.path.split('/')[-1] message_id = email.original.get('message-id').strip('<>') sender = getattr(email, 'sender', None) to = getattr(email, 'to', None) cc = getattr(email, 'cc', None) subject = getattr(email, 'subject', None) logging.info('Received %s from %s to %s (%s) cc %s: %s', message_id, sender, to, addr, cc, subject) addr = self.request.path.split('/')[-1] user = addr.split('@')[0] source = FacebookEmailAccount.query( FacebookEmailAccount.email_user == user).get() logging.info('Source for %s is %s', user, source) util.email_me(subject='New email from %s: %s' % (sender, subject), body='Source: %s' % (source.bridgy_url(self) if source else None)) htmls = list(body.decode() for _, body in email.bodies('text/html')) fbe = FacebookEmail.get_or_insert( message_id, source=source.key if source else None, htmls=htmls) logging.info('FacebookEmail created %s: %s', fbe.created, fbe.key.urlsafe()) if not source: self.response.status_code = 404 self.response.write( 'No Facebook email user found with address %s' % addr) return for html in htmls: obj = gr_facebook.Facebook().email_to_object(html) if obj: break else: self.response.status_code = 400 self.response.write('No HTML body could be parsed') return logging.info('Converted to AS1: %s', json_dumps(obj, indent=2)) base_obj = source.gr_source.base_object(obj) # note that this ignores the id query param (the post's user id) and uses # the source object's user id instead. base_obj['url'] = source.canonicalize_url(base_obj['url']) # also note that base_obj['id'] is not a tag URI, it's the raw Facebook post # id, eg '104790764108207'. we don't use it from activities_json much, # though, just in PropagateResponse.source_url(), which handles this fine. original_post_discovery.refetch(source) targets, mentions = original_post_discovery.discover(source, base_obj, fetch_hfeed=False) logging.info('Got targets %s mentions %s', targets, mentions) resp = Response(id=obj['id'], source=source.key, type=Response.get_type(obj), response_json=json_dumps(obj), activities_json=[json_dumps(base_obj)], unsent=targets) resp.get_or_save(source, restart=True) fbe.response = resp.key fbe.put()
def store_activity(self): activity = copy.deepcopy(MBASIC_ACTIVITIES[0]) activity['actor']['url'] = 'http://snarfed.org/' return Activity(id='tag:facebook.com,2013:123', source=self.source.key, activity_json=json_dumps(activity)).put()
def test_is_private(self): self.assertFalse(self.m.is_private()) self.auth_entity.user_json = json_dumps({'locked': True}) self.auth_entity.put() self.assertTrue(self.m.is_private())
def store_activity(self): activity = copy.deepcopy(HTML_PHOTO_ACTIVITY) activity['actor']['url'] = 'http://snarfed.org/' return Activity(id='tag:instagram.com,2013:123_456', source=self.source.key, activity_json=json_dumps(activity)).put()
def get(self, type, source_short_name, string_id, *ids): source_cls = models.sources.get(source_short_name) if not source_cls: self.abort( 400, "Source type '%s' not found. Known sources: %s" % (source_short_name, filter(None, models.sources.keys()))) self.source = source_cls.get_by_id(string_id) if not self.source: self.abort( 400, 'Source %s %s not found' % (source_short_name, string_id)) elif (self.source.status == 'disabled' or 'listen' not in self.source.features): self.abort( 400, 'Source %s is disabled for backfeed' % self.source.bridgy_path()) format = self.request.get('format', 'html') if format not in ('html', 'json'): self.abort(400, 'Invalid format %s, expected html or json' % format) for id in ids: if not self.VALID_ID.match(id): self.abort(404, 'Invalid id %s' % id) try: obj = self.get_item(*ids) except models.DisableSource as e: self.abort( 401, "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!" ) except ValueError as e: self.abort(400, '%s error:\n%s' % (self.source.GR_CLASS.NAME, e)) except Exception as e: # pass through all API HTTP errors if we can identify them code, body = util.interpret_http_exception(e) if code: self.response.status_int = int(code) self.response.headers['Content-Type'] = 'text/plain' self.response.write('%s error:\n%s' % (self.source.GR_CLASS.NAME, body)) return else: raise if not obj: self.abort( 404, 'Not found: %s:%s %s %s' % (source_short_name, string_id, type, ids)) if self.source.is_blocked(obj): self.abort(410, 'That user is currently blocked') # use https for profile pictures so we don't cause SSL mixed mode errors # when serving over https. author = obj.get('author', {}) image = author.get('image', {}) url = image.get('url') if url: image['url'] = util.update_scheme(url, self) mf2_json = microformats2.object_to_json(obj, synthesize_content=False) # try to include the author's silo profile url author = first_props(mf2_json.get('properties', {})).get('author', {}) author_uid = first_props(author.get('properties', {})).get('uid', '') if author_uid: parsed = util.parse_tag_uri(author_uid) if parsed: urls = author.get('properties', {}).setdefault('url', []) try: silo_url = self.source.gr_source.user_url(parsed[1]) if silo_url not in microformats2.get_string_urls(urls): urls.append(silo_url) except NotImplementedError: # from gr_source.user_url() pass # write the response! self.response.headers['Access-Control-Allow-Origin'] = '*' if format == 'html': self.response.headers['Content-Type'] = 'text/html; charset=utf-8' url = obj.get('url', '') self.response.out.write( TEMPLATE.substitute({ 'refresh': (('<meta http-equiv="refresh" content="0;url=%s">' % url) if url else ''), 'url': url, 'body': microformats2.json_to_html(mf2_json), 'title': obj.get('title') or obj.get('content') or 'Bridgy Response', })) elif format == 'json': self.response.headers[ 'Content-Type'] = 'application/json; charset=utf-8' self.response.out.write(json_dumps(mf2_json, indent=2))
def test_registration_with_user_url(self): """Run through an authorization back and forth with a custom user url provided to the auth mechanism """ encoded_state = urllib.parse.quote_plus( json_dumps( { 'callback': 'http://withknown.com/bridgy_callback', 'feature': 'listen', 'operation': 'add', 'user_url': 'https://kylewm.com', }, sort_keys=True)) application = webapp2.WSGIApplication([ ('/fakesource/start', testutil.FakeStartHandler), ('/fakesource/add', testutil.FakeAddHandler), ]) self.expect_webmention_requests_get( 'https://kylewm.com/', response='<html><link rel="webmention" href="/webmention"></html>') self.mox.ReplayAll() resp = application.get_response( '/fakesource/start', method='POST', text=urllib.parse.urlencode({ 'feature': 'listen', 'callback': 'http://withknown.com/bridgy_callback', 'user_url': 'https://kylewm.com', })) expected_auth_url = 'http://fake/auth/url?' + urllib.parse.urlencode({ 'redirect_uri': 'http://localhost/fakesource/add?state=' + encoded_state, }) self.assert_equals(302, resp.status_code) self.assert_equals(expected_auth_url, resp.headers['location']) resp = application.get_response( '/fakesource/add?state=' + encoded_state + '&oauth_token=fake-token&oauth_token_secret=fake-secret') self.assert_equals(302, resp.status_code) self.assert_equals( 'http://withknown.com/bridgy_callback?' + urllib.parse.urlencode([ ('result', 'success'), ('user', 'http://localhost/fake/0123456789'), ('key', ndb.Key('FakeSource', '0123456789').urlsafe().decode()), ]), resp.headers['location']) self.assertEqual( 'logins="/fake/0123456789?Fake+User"; expires="2001-12-31 00:00:00"; Path=/', resp.headers['Set-Cookie']) source = FakeSource.get_by_id('0123456789') self.assertTrue(source) self.assert_equals('Fake User', source.name) self.assert_equals(['listen'], source.features) self.assert_equals(['https://kylewm.com/', 'http://fakeuser.com/'], source.domain_urls) self.assert_equals(['kylewm.com', 'fakeuser.com'], source.domains)
def output(self, obj): self.response.headers['Content-Type'] = JSON_CONTENT_TYPE self.response.write(json_dumps(obj, indent=2))
def write_response(self, response, actor=None, url=None, title=None, hfeed=None): """Converts ActivityStreams activities and writes them out. Args: response: response dict with values based on OpenSocial ActivityStreams REST API, as returned by Source.get_activities_response() actor: optional ActivityStreams actor dict for current user. Only used for Atom and JSON Feed output. url: the input URL title: string, used in feed output (Atom, JSON Feed, RSS) hfeed: dict, parsed mf2 h-feed, if available """ format = self.request.get('format') or self.request.get( 'output') or 'json' if format not in FORMATS: raise exc.HTTPBadRequest('Invalid format: %s, expected one of %r' % (format, FORMATS)) if 'plaintext' in self.request.params: # override content type self.response.headers['Content-Type'] = 'text/plain' else: content_type = FORMATS.get(format) if content_type: self.response.headers['Content-Type'] = content_type if self.request.method == 'HEAD': return activities = response['items'] try: if format in ('as1', 'json', 'activitystreams'): self.response.out.write(json_dumps(response, indent=2)) elif format == 'as2': response.update({ 'items': [as2.from_as1(a) for a in activities], 'totalItems': response.pop('totalResults', None), 'updated': response.pop('updatedSince', None), 'filtered': None, 'sorted': None, }) self.response.out.write( json_dumps(util.trim_nulls(response), indent=2)) elif format == 'atom': hub = self.request.get('hub') reader = self.request.get('reader', 'true').lower() if reader not in ('true', 'false'): self.abort(400, 'reader param must be either true or false') if not actor and hfeed: actor = microformats2.json_to_object({ 'properties': hfeed.get('properties', {}), }) self.response.out.write( atom.activities_to_atom(activities, actor, host_url=url or self.request.host_url + '/', request_url=self.request.url, xml_base=util.base_url(url), title=title, rels={'hub': hub} if hub else None, reader=(reader == 'true'))) self.response.headers.add( 'Link', '<%s>; rel="self"' % util.quote_path(self.request.url)) if hub: self.response.headers.add( 'Link', '<%s>; rel="hub"' % util.quote_path(hub)) elif format == 'rss': if not title: title = 'Feed for %s' % url self.response.out.write( rss.from_activities(activities, actor, title=title, feed_url=self.request.url, hfeed=hfeed, home_page_url=util.base_url(url))) elif format in ('as1-xml', 'xml'): self.response.out.write(XML_TEMPLATE % util.to_xml(response)) elif format == 'html': self.response.out.write( microformats2.activities_to_html(activities)) elif format in ('mf2-json', 'json-mf2'): items = [microformats2.activity_to_json(a) for a in activities] self.response.out.write(json_dumps({'items': items}, indent=2)) elif format == 'jsonfeed': try: jf = jsonfeed.activities_to_jsonfeed( activities, actor=actor, title=title, feed_url=self.request.url) except TypeError as e: raise exc.HTTPBadRequest('Unsupported input data: %s' % e) self.response.out.write(json_dumps(jf, indent=2)) except ValueError as e: logging.warning('converting to output format failed', stack_info=True) self.abort(400, 'Could not convert to %s: %s' % (format, str(e)))
def test_new_no_primary_blog(self): self.auth_entity.user_json = json_dumps({'user': {'blogs': [{'url': 'foo'}]}}) with app.test_request_context(): self.assertIsNone(Tumblr.new(auth_entity=self.auth_entity)) self.assertIn('Tumblr blog not found', get_flashed_messages()[0])
def _try_salmon(self, resp): """ Args: resp: Response """ # fetch target HTML page, extract Atom rel-alternate link if not self.target_resp: self.target_resp = common.requests_get(resp.target()) parsed = util.parse_html(self.target_resp) atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if not atom_url or not atom_url.get('href'): self.error('Target post %s has no Atom link' % resp.target(), status=400) # fetch Atom target post, extract and inject id into source object base_url = '' base = parsed.find('base') if base and base.get('href'): base_url = base['href'] atom_link = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) atom_url = urllib.parse.urljoin( resp.target(), urllib.parse.urljoin(base_url, atom_link['href'])) feed = common.requests_get(atom_url).text parsed = feedparser.parse(feed) logging.info('Parsed: %s', json_dumps(parsed, indent=2)) entry = parsed.entries[0] target_id = entry.id in_reply_to = self.source_obj.get('inReplyTo') source_obj_obj = self.source_obj.get('object') if in_reply_to: in_reply_to[0]['id'] = target_id elif isinstance(source_obj_obj, dict): source_obj_obj['id'] = target_id # Mastodon (and maybe others?) require a rel-mentioned link to the # original post's author to make it show up as a reply: # app/services/process_interaction_service.rb # ...so add them as a tag, which atom renders as a rel-mention link. authors = entry.get('authors', None) if authors: url = entry.authors[0].get('href') if url: self.source_obj.setdefault('tags', []).append({'url': url}) # extract and discover salmon endpoint logging.info('Discovering Salmon endpoint in %s', atom_url) endpoint = django_salmon.discover_salmon_endpoint(feed) if not endpoint: # try webfinger parsed = urllib.parse.urlparse(resp.target()) # TODO: test missing email author = entry.get('author_detail', {}) email = author.get('email') or '@'.join( (author.get('name', ''), parsed.netloc)) try: # TODO: always https? profile = common.requests_get( '%s://%s/.well-known/webfinger?resource=acct:%s' % (parsed.scheme, parsed.netloc, email), verify=False) endpoint = django_salmon.get_salmon_replies_link(profile.json()) except requests.HTTPError as e: pass if not endpoint: self.error('No salmon endpoint found!', status=400) logging.info('Discovered Salmon endpoint %s', endpoint) # construct reply Atom object self.source_url = resp.source() activity = self.source_obj if self.source_obj.get('verb') not in source.VERBS_WITH_OBJECT: activity = {'object': self.source_obj} entry = atom.activity_to_atom(activity, xml_base=self.source_url) logging.info('Converted %s to Atom:\n%s', self.source_url, entry) # sign reply and wrap in magic envelope domain = urllib.parse.urlparse(self.source_url).netloc key = MagicKey.get_or_create(domain) logging.info('Using key for %s: %s', domain, key) magic_envelope = magicsigs.magic_envelope( entry, common.CONTENT_TYPE_ATOM, key).decode() logging.info('Sending Salmon slap to %s', endpoint) common.requests_post( endpoint, data=common.XML_UTF8 + magic_envelope, headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE}) return True
def _activitypub_targets(self): """ Returns: list of (Response, string inbox URL) """ # if there's an in-reply-to, like-of, or repost-of, that's the target. # otherwise, it's all followers' inboxes. target = self._single_target() if not target: # interpret this as a Create or Update, deliver it to followers inboxes = [] for follower in Follower.query().filter( Follower.key > Key('Follower', self.source_domain + ' '), Follower.key < Key('Follower', self.source_domain + chr(ord(' ') + 1))): if follower.status != 'inactive' and follower.last_follow: actor = json_loads(follower.last_follow).get('actor') if actor and isinstance(actor, dict): inboxes.append(actor.get('endpoints', {}).get('sharedInbox') or actor.get('publicInbox')or actor.get('inbox')) return [(Response.get_or_create( source=self.source_url, target=inbox, direction='out', protocol='activitypub', source_mf2=json_dumps(self.source_mf2)), inbox) for inbox in inboxes if inbox] # fetch target page as AS2 object try: self.target_resp = common.get_as2(target) except (requests.HTTPError, exc.HTTPBadGateway) as e: self.target_resp = getattr(e, 'response', None) if (self.target_resp and self.target_resp.status_code // 100 == 2 and common.content_type(self.target_resp).startswith('text/html')): # TODO: pass e.response to try_salmon()'s target_resp return False # make post() try Salmon else: raise target_url = self.target_resp.url or target resp = Response.get_or_create( source=self.source_url, target=target_url, direction='out', protocol='activitypub', source_mf2=json_dumps(self.source_mf2)) # find target's inbox target_obj = self.target_resp.json() resp.target_as2 = json_dumps(target_obj) inbox_url = target_obj.get('inbox') if not inbox_url: # TODO: test actor/attributedTo and not, with/without inbox actor = (util.get_first(target_obj, 'actor') or util.get_first(target_obj, 'attributedTo')) if isinstance(actor, dict): inbox_url = actor.get('inbox') actor = actor.get('url') or actor.get('id') if not inbox_url and not actor: self.error('Target object has no actor or attributedTo with URL or id.') elif not isinstance(actor, str): self.error('Target actor or attributedTo has unexpected url or id object: %r' % actor) if not inbox_url: # fetch actor as AS object actor = common.get_as2(actor).json() inbox_url = actor.get('inbox') if not inbox_url: # TODO: probably need a way to save errors like this so that we can # return them if ostatus fails too. # self.error('Target actor has no inbox') return [] inbox_url = urllib.parse.urljoin(target_url, inbox_url) return [(resp, inbox_url)]
def test_retry(self): self.assertEqual([], self.taskqueue_stub.GetTasks('propagate')) source = self.sources[0] source.domain_urls = ['http://orig'] source.last_hfeed_refetch = last_hfeed_refetch = \ testutil.NOW - datetime.timedelta(minutes=1) source.put() resp = self.responses[0] resp.status = 'complete' resp.unsent = ['http://unsent'] resp.sent = ['http://sent'] resp.error = ['http://error'] resp.failed = ['http://failed'] resp.skipped = ['https://skipped'] # SyndicatedPost with new target URLs resp.activities_json = [ json_dumps({'object': { 'url': 'https://fa.ke/1' }}), json_dumps({ 'url': 'https://fa.ke/2', 'object': { 'unused': 'ok' } }), json_dumps({'url': 'https://fa.ke/3'}), ] resp.put() SyndicatedPost.insert(source, 'https://fa.ke/1', 'https://orig/1') SyndicatedPost.insert(source, 'https://fa.ke/2', 'http://orig/2') SyndicatedPost.insert(source, 'https://fa.ke/3', 'http://orig/3') # cached webmention endpoint memcache.set('W https skipped /', 'asdf') key = resp.key.urlsafe() response = app.application.get_response( '/retry', method='POST', body=native_str(urllib.parse.urlencode({'key': key}))) self.assertEquals(302, response.status_int) self.assertEquals(source.bridgy_url(self.handler), response.headers['Location'].split('#')[0]) params = testutil.get_task_params( self.taskqueue_stub.GetTasks('propagate')[0]) self.assertEqual(key, params['response_key']) # status and URLs should be refreshed got = resp.key.get() self.assertEqual('new', got.status) self.assertItemsEqual([ 'http://unsent/', 'http://sent/', 'https://skipped/', 'http://error/', 'http://failed/', 'https://orig/1', 'http://orig/2', 'http://orig/3' ], got.unsent) for field in got.sent, got.skipped, got.error, got.failed: self.assertEqual([], field) # webmention endpoints for URL domains should be refreshed self.assertIsNone(memcache.get('W https skipped /')) # shouldn't have refetched h-feed self.assertEqual(last_hfeed_refetch, source.key.get().last_hfeed_refetch)
def setUp(self): super(ModelsTest, self).setUp() # sources auth_entities = [ FakeAuthEntity(key=ndb.Key('FakeAuthEntity', '01122334455'), user_json=json_dumps({ 'id': '0123456789', 'name': 'Fake User', 'url': 'http://fakeuser.com/', })), FakeAuthEntity(key=ndb.Key('FakeAuthEntity', '0022446688'), user_json=json_dumps({ 'id': '0022446688', 'name': 'Another Fake', 'url': 'http://anotherfake.com/', })) ] for entity in auth_entities: entity.put() self.sources = [ FakeSource.new(None, auth_entity=auth_entities[0]), FakeSource.new(None, auth_entity=auth_entities[1]) ] for entity in self.sources: entity.features = ['listen'] entity.put() self.actor = FakeGrSource.actor = { 'objectType': 'person', 'id': 'tag:fa.ke,2013:212038', 'username': '******', 'displayName': 'Ryan B', 'url': 'https://snarfed.org/', 'image': { 'url': 'http://pic.ture/url' }, } # activities self.activities = FakeGrSource.activities = [{ 'id': 'tag:source.com,2013:%s' % id, 'url': 'http://fa.ke/post/url', 'object': { 'objectType': 'note', 'id': 'tag:source.com,2013:%s' % id, 'url': 'http://fa.ke/post/url', 'content': 'foo http://target1/post/url bar', 'to': [{ 'objectType': 'group', 'alias': '@public' }], 'replies': { 'items': [{ 'objectType': 'comment', 'id': 'tag:source.com,2013:1_2_%s' % id, 'url': 'http://fa.ke/comment/url', 'content': 'foo bar', }], 'totalItems': 1, }, 'tags': [{ 'objectType': 'activity', 'verb': 'like', 'id': 'tag:source.com,2013:%s_liked_by_alice' % id, 'object': { 'url': 'http://example.com/abc' }, 'author': { 'id': 'tag:source.com,2013:alice', 'url': 'http://example.com/alice', }, }, { 'id': 'tag:source.com,2013:%s_reposted_by_bob' % id, 'objectType': 'activity', 'verb': 'share', 'object': { 'url': 'http://example.com/def' }, 'author': { 'url': 'http://example.com/bob' }, }, { 'id': 'tag:source.com,2013:%s_scissors_by_bob' % id, 'objectType': 'activity', 'verb': 'react', 'content': '✁', 'object': { 'url': 'http://example.com/def' }, 'author': { 'url': 'http://example.com/bob' }, }], }, } for id in ('a', 'b', 'c')] # responses self.responses = [] created = datetime.datetime.utcnow() - datetime.timedelta(days=10) for activity in self.activities: obj = activity['object'] pruned_activity = { 'id': activity['id'], 'url': 'http://fa.ke/post/url', 'object': { 'content': 'foo http://target1/post/url bar', } } comment = obj['replies']['items'][0] self.responses.append( Response(id=comment['id'], activities_json=[json_dumps(pruned_activity)], response_json=json_dumps(comment), type='comment', source=self.sources[0].key, unsent=['http://target1/post/url'], created=created)) created += datetime.timedelta(hours=1) like = obj['tags'][0] self.responses.append( Response(id=like['id'], activities_json=[json_dumps(pruned_activity)], response_json=json_dumps(like), type='like', source=self.sources[0].key, unsent=['http://target1/post/url'], created=created)) created += datetime.timedelta(hours=1) share = obj['tags'][1] self.responses.append( Response(id=share['id'], activities_json=[json_dumps(pruned_activity)], response_json=json_dumps(share), type='repost', source=self.sources[0].key, unsent=['http://target1/post/url'], created=created)) created += datetime.timedelta(hours=1) reaction = obj['tags'][2] self.responses.append( Response(id=reaction['id'], activities_json=[json_dumps(pruned_activity)], response_json=json_dumps(reaction), type='react', source=self.sources[0].key, unsent=['http://target1/post/url'], created=created)) created += datetime.timedelta(hours=1) # publishes self.publishes = [ Publish( parent=PublishedPage(id='https://post').key, source=self.sources[0].key, status='complete', published={'url': 'http://fa.ke/syndpost'}, ) ] # blogposts self.blogposts = [ BlogPost( id='https://post', source=self.sources[0].key, status='complete', feed_item={'title': 'a post'}, sent=['http://a/link'], ) ]
def test_social_user_page_mf2(self): """Check the custom mf2 we render on social user pages.""" self.sources[0].features = ['listen', 'publish'] self.sources[0].put() # test invite with missing object and content resp = json_loads(self.responses[8].response_json) resp['verb'] = 'invite' resp.pop('object', None) resp.pop('content', None) self.responses[8].response_json = json_dumps(resp) # test that invites render the invitee, not the inviter # https://github.com/snarfed/bridgy/issues/754 self.responses[9].type = 'rsvp' self.responses[9].response_json = json_dumps({ 'id': 'tag:fa.ke,2013:111', 'objectType': 'activity', 'verb': 'invite', 'url': 'http://fa.ke/event', 'actor': { 'displayName': 'Mrs. Host', 'url': 'http://fa.ke/host', }, 'object': { 'objectType': 'person', 'displayName': 'Ms. Guest', 'url': 'http://fa.ke/guest', }, }) for entity in self.responses + self.publishes + self.blogposts: entity.put() user_url = self.sources[0].bridgy_path() response = app.application.get_response(user_url) self.assertEquals(200, response.status_int) parsed = util.parse_mf2(response.body, user_url) hcard = parsed.get('items', [])[0] self.assertEquals(['h-card'], hcard['type']) self.assertEquals( ['Fake User'], hcard['properties'].get('name')) self.assertEquals( ['http://fa.ke/profile/url'], hcard['properties'].get('url')) self.assertEquals( ['enabled'], hcard['properties'].get('bridgy-account-status')) self.assertEquals( ['enabled'], hcard['properties'].get('bridgy-listen-status')) self.assertEquals( ['enabled'], hcard['properties'].get('bridgy-publish-status')) expected_resps = self.responses[:10] for item, resp in zip(hcard['children'], expected_resps): self.assertIn('h-bridgy-response', item['type']) props = item['properties'] self.assertEquals([resp.status], props['bridgy-status']) self.assertEquals([json_loads(resp.activities_json[0])['url']], props['bridgy-original-source']) self.assertEquals(resp.unsent, props['bridgy-target']) # check invite html = response.body.decode('utf-8') self.assertIn('Ms. Guest is invited.', html) self.assertNotIn('Mrs. Host is invited.', html) publish = hcard['children'][len(expected_resps)] self.assertIn('h-bridgy-publish', publish['type']) props = publish['properties'] self.assertEquals([self.publishes[0].key.parent().id()], props['url']) self.assertEquals([self.publishes[0].status], props['bridgy-status'])
HTML_VIDEO_ACTIVITY_FULL, HTML_VIDEO_EXTRA_COMMENT_OBJ, HTML_VIDEO_PAGE, HTML_VIEWER_CONFIG, LIKE_OBJS, ) from oauth_dropins.webutil.util import HTTP_TIMEOUT, json_dumps, json_loads import app from instagram import Instagram from models import Activity, Domain from .testutil import ModelsTest HTML_VIDEO_WITH_VIEWER = copy.deepcopy(HTML_VIDEO_PAGE) HTML_VIDEO_WITH_VIEWER['config'] = HTML_VIEWER_CONFIG HTML_VIDEO_COMPLETE = HTML_HEADER + json_dumps( HTML_VIDEO_WITH_VIEWER) + HTML_FOOTER class InstagramTest(ModelsTest): def setUp(self): super(InstagramTest, self).setUp() self.source = Instagram.new(self.handler, actor=self.actor) self.domain = Domain(id='snarfed.org', tokens=['towkin']).put() self.auth = f'token=towkin&key={self.source.key.urlsafe().decode()}' def get_response(self, path_query, auth=True, **kwargs): if auth and '?' not in path_query: path_query += f'?{self.auth}' return app.application.get_response(f'/instagram/browser/{path_query}', method='POST', **kwargs)
def send_webmentions(self, activity_wrapped, proxy=None, **response_props): """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery. Args: activity_wrapped: dict, AS1 activity response_props: passed through to the newly created Responses """ activity = self.redirect_unwrap(activity_wrapped) verb = activity.get('verb') if verb and verb not in SUPPORTED_VERBS: self.error('%s activities are not supported yet.' % verb) # extract source and targets source = activity.get('url') or activity.get('id') obj = activity.get('object') obj_url = util.get_url(obj) targets = util.get_list(activity, 'inReplyTo') if isinstance(obj, dict): if not source or verb in ('create', 'post', 'update'): source = obj_url or obj.get('id') targets.extend(util.get_list(obj, 'inReplyTo')) tags = util.get_list(activity_wrapped, 'tags') obj_wrapped = activity_wrapped.get('object') if isinstance(obj_wrapped, dict): tags.extend(util.get_list(obj_wrapped, 'tags')) for tag in tags: if tag.get('objectType') == 'mention': url = tag.get('url') if url and url.startswith(self.request.host_url): targets.append(self.redirect_unwrap(url)) if verb in ('follow', 'like', 'share'): targets.append(obj_url) targets = util.dedupe_urls(util.get_url(t) for t in targets) if not source: self.error("Couldn't find original post URL") if not targets: self.error( "Couldn't find any target URLs in inReplyTo, object, or mention tags" ) # send webmentions and store Responses errors = [] for target in targets: if util.domain_from_link(target) == util.domain_from_link(source): logging.info('Skipping same-domain webmention from %s to %s', source, target) continue response = Response(source=source, target=target, direction='in', **response_props) response.put() wm_source = (response.proxy_url(self) if verb in ('follow', 'like', 'share') or proxy else source) logging.info('Sending webmention from %s to %s', wm_source, target) wm = send.WebmentionSend(wm_source, target) if wm.send(headers=HEADERS): logging.info('Success: %s', wm.response) response.status = 'complete' else: logging.warning('Failed: %s', wm.error) errors.append(wm.error) response.status = 'error' response.put() if errors: msg = 'Errors:\n' + '\n'.join( util.json_dumps(e, indent=2) for e in errors) self.error(msg, status=errors[0].get('http_status'))
def backfeed(self, source, responses=None, activities=None): """Processes responses and activities and generates propagate tasks. Stores property names and values to update in source.updates. Args: source: Source responses: dict mapping AS response id to AS object activities: dict mapping AS activity id to AS object """ if responses is None: responses = {} if activities is None: activities = {} # Cache to make sure we only fetch the author's h-feed(s) the # first time we see it fetched_hfeeds = set() # narrow down to just public activities public = {} private = {} for id, activity in activities.items(): (public if source.is_activity_public(activity) else private)[id] = activity logging.info('Found %d public activities: %s', len(public), public.keys()) logging.info('Found %d private activities: %s', len(private), private.keys()) last_public_post = (source.last_public_post or util.EPOCH).isoformat() public_published = util.trim_nulls( [a.get('object', {}).get('published') for a in public.values()]) if public_published: max_published = max(public_published) if max_published > last_public_post: last_public_post = max_published source.updates['last_public_post'] = \ util.as_utc(util.parse_iso8601(max_published)) source.updates['recent_private_posts'] = \ len([a for a in private.values() if a.get('object', {}).get('published', util.EPOCH_ISO) > last_public_post]) # # Step 2: extract responses, store their activities in response['activities'] # # WARNING: this creates circular references in link posts found by search # queries in step 1, since they are their own activity. We use # prune_activity() and prune_response() in step 4 to remove these before # serializing to JSON. # for id, activity in public.items(): obj = activity.get('object') or activity # handle user mentions user_id = source.user_tag_id() if obj.get( 'author', {}).get('id') != user_id and activity.get('verb') != 'share': for tag in obj.get('tags', []): urls = tag.get('urls') if tag.get('objectType') == 'person' and tag.get( 'id') == user_id and urls: activity['originals'], activity['mentions'] = \ original_post_discovery.discover( source, activity, fetch_hfeed=True, include_redirect_sources=False, already_fetched_hfeeds=fetched_hfeeds) activity['mentions'].update( u.get('value') for u in urls) responses[id] = activity break # handle quote mentions for att in obj.get('attachments', []): if (att.get('objectType') in ('note', 'article') and att.get( 'author', {}).get('id') == source.user_tag_id()): # now that we've confirmed that one exists, OPD will dig # into the actual attachments if 'originals' not in activity or 'mentions' not in activity: activity['originals'], activity['mentions'] = \ original_post_discovery.discover( source, activity, fetch_hfeed=True, include_redirect_sources=False, already_fetched_hfeeds=fetched_hfeeds) responses[id] = activity break # extract replies, likes, reactions, reposts, and rsvps replies = obj.get('replies', {}).get('items', []) tags = obj.get('tags', []) likes = [t for t in tags if Response.get_type(t) == 'like'] reactions = [t for t in tags if Response.get_type(t) == 'react'] reposts = [t for t in tags if Response.get_type(t) == 'repost'] rsvps = Source.get_rsvps_from_event(obj) # coalesce responses. drop any without ids for resp in replies + likes + reactions + reposts + rsvps: id = resp.get('id') if not id: logging.error('Skipping response without id: %s', json_dumps(resp, indent=2)) continue if source.is_blocked(resp): logging.info( 'Skipping response by blocked user: %s', json_dumps(resp.get('author') or resp.get('actor'), indent=2)) continue resp.setdefault('activities', []).append(activity) # when we find two responses with the same id, the earlier one may have # come from a link post or user mention, and this one is probably better # since it probably came from the user's activity, so prefer this one. # background: https://github.com/snarfed/bridgy/issues/533 existing = responses.get(id) if existing: if source.gr_source.activity_changed(resp, existing, log=True): logging.warning( 'Got two different versions of same response!\n%s\n%s', existing, resp) resp['activities'].extend(existing.get('activities', [])) responses[id] = resp # # Step 3: filter out responses we've already seen # # seen responses (JSON objects) for each source are stored in its entity. unchanged_responses = [] if source.seen_responses_cache_json: for seen in json_loads(source.seen_responses_cache_json): id = seen['id'] resp = responses.get(id) if resp and not source.gr_source.activity_changed( seen, resp, log=True): unchanged_responses.append(seen) del responses[id] # # Step 4: store new responses and enqueue propagate tasks # pruned_responses = [] source.blocked_ids = None for id, resp in responses.items(): resp_type = Response.get_type(resp) activities = resp.pop('activities', []) if not activities and resp_type == 'post': activities = [resp] too_long = set() urls_to_activity = {} for i, activity in enumerate(activities): # we'll usually have multiple responses for the same activity, and the # objects in resp['activities'] are shared, so cache each activity's # discovered webmention targets inside its object. if 'originals' not in activity or 'mentions' not in activity: activity['originals'], activity['mentions'] = \ original_post_discovery.discover( source, activity, fetch_hfeed=True, include_redirect_sources=False, already_fetched_hfeeds=fetched_hfeeds) targets = original_post_discovery.targets_for_response( resp, originals=activity['originals'], mentions=activity['mentions']) if targets: logging.info('%s has %d webmention target(s): %s', activity.get('url'), len(targets), ' '.join(targets)) # new response to propagate! load block list if we haven't already if source.blocked_ids is None: source.load_blocklist() for t in targets: if len(t) <= _MAX_STRING_LENGTH: urls_to_activity[t] = i else: logging.info( 'Giving up on target URL over %s chars! %s', _MAX_STRING_LENGTH, t) too_long.add(t[:_MAX_STRING_LENGTH - 4] + '...') # store/update response entity. the prune_*() calls are important to # remove circular references in link responses, which are their own # activities. details in the step 2 comment above. pruned_response = util.prune_response(resp) pruned_responses.append(pruned_response) resp_entity = Response(id=id, source=source.key, activities_json=[ json_dumps( util.prune_activity(a, source)) for a in activities ], response_json=json_dumps(pruned_response), type=resp_type, unsent=list(urls_to_activity.keys()), failed=list(too_long), original_posts=resp.get('originals', [])) if urls_to_activity and len(activities) > 1: resp_entity.urls_to_activity = json_dumps(urls_to_activity) resp_entity.get_or_save(source, restart=self.RESTART_EXISTING_TASKS) # update cache if pruned_responses: source.updates['seen_responses_cache_json'] = json_dumps( pruned_responses + unchanged_responses)
def attempt_single_item(self, item): """Attempts to preview or publish a single mf2 item. Args: item: mf2 item dict from mf2py Returns: CreationResult """ self.maybe_inject_silo_content(item) obj = microformats2.json_to_object(item) ignore_formatting = self.ignore_formatting(item) if ignore_formatting: prop = microformats2.first_props(item.get('properties', {})) content = microformats2.get_text(prop.get('content')) if content: obj['content'] = content.strip() # which original post URL to include? in order of preference: # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173) # 2. original user-provided URL if it redirected # 3. u-url if available # 4. actual final fetched URL if self.shortlink: obj['url'] = self.shortlink elif self.source_url() != self.fetched.url: obj['url'] = self.source_url() elif 'url' not in obj: obj['url'] = self.fetched.url logging.debug( f'Converted to ActivityStreams object: {json_dumps(obj, indent=2)}' ) # posts and comments need content obj_type = obj.get('objectType') if (obj_type in ('note', 'article', 'comment') and (not obj.get('content') and not obj.get('summary') and not obj.get('displayName'))): return gr_source.creation_result( abort=False, error_plain=f'Could not find content in {self.fetched.url}', error_html= f'Could not find <a href="http://microformats.org/">content</a> in {self.fetched.url}' ) self.preprocess(obj) include_link = self.include_link(item) if not self.authorize(): return gr_source.creation_result(abort=True) if self.PREVIEW: result = self.source.gr_source.preview_create( obj, include_link=include_link, ignore_formatting=ignore_formatting) previewed = result.content or result.description if self.entity.type == 'preview': self.entity.published = previewed if not previewed: return result # there was an error return self._render_preview(result, include_link=include_link) else: result = self.source.gr_source.create( obj, include_link=include_link, ignore_formatting=ignore_formatting) self.entity.published = result.content if not result.content: return result # there was an error if 'url' not in self.entity.published: self.entity.published['url'] = obj.get('url') self.entity.type = self.entity.published.get( 'type') or models.get_type(obj) ret = json_dumps(self.entity.published, indent=2) logging.info(f'Returning {ret}') return gr_source.creation_result(ret)
def template_vars(self, domain, url=None): assert domain if domain.split('.')[-1] in NON_TLDS: self.error("%s doesn't look like a domain" % domain, status=404) # find representative h-card. try url, then url's home page, then domain urls = ['http://%s/' % domain] if url: urls = [url, urllib.parse.urljoin(url, '/')] + urls for candidate in urls: resp = common.requests_get(candidate) parsed = util.parse_html(resp) mf2 = util.parse_mf2(parsed, url=resp.url) # logging.debug('Parsed mf2 for %s: %s', resp.url, json_dumps(mf2, indent=2)) hcard = mf2util.representative_hcard(mf2, resp.url) if hcard: logging.info('Representative h-card: %s', json_dumps(hcard, indent=2)) break else: self.error("""\ Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on %s""" % resp.url) logging.info('Generating WebFinger data for %s', domain) key = models.MagicKey.get_or_create(domain) props = hcard.get('properties', {}) urls = util.dedupe_urls(props.get('url', []) + [resp.url]) canonical_url = urls[0] acct = '%s@%s' % (domain, domain) for url in urls: if url.startswith('acct:'): urluser, urldomain = util.parse_acct_uri(url) if urldomain == domain: acct = '%s@%s' % (urluser, domain) logging.info('Found custom username: acct:%s', acct) break # discover atom feed, if any atom = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if atom and atom['href']: atom = urllib.parse.urljoin(resp.url, atom['href']) else: atom = 'https://granary.io/url?' + urllib.parse.urlencode( { 'input': 'html', 'output': 'atom', 'url': resp.url, 'hub': resp.url, }) # discover PuSH, if any for link in resp.headers.get('Link', '').split(','): match = common.LINK_HEADER_RE.match(link) if match and match.group(2) == 'hub': hub = match.group(1) else: hub = 'https://bridgy-fed.superfeedr.com/' # generate webfinger content data = util.trim_nulls({ 'subject': 'acct:' + acct, 'aliases': urls, 'magic_keys': [{ 'value': key.href() }], 'links': sum(([{ 'rel': 'http://webfinger.net/rel/profile-page', 'type': 'text/html', 'href': url, }] for url in urls if url.startswith("http")), []) + [{ 'rel': 'http://webfinger.net/rel/avatar', 'href': url, } for url in props.get('photo', [])] + [ { 'rel': 'canonical_uri', 'type': 'text/html', 'href': canonical_url, }, # ActivityPub { 'rel': 'self', 'type': common.CONTENT_TYPE_AS2, # WARNING: in python 2 sometimes request.host_url lost port, # http://localhost:8080 would become just http://localhost. no # clue how or why. pay attention here if that happens again. 'href': '%s/%s' % (self.request.host_url, domain), }, { 'rel': 'inbox', 'type': common.CONTENT_TYPE_AS2, 'href': '%s/%s/inbox' % (self.request.host_url, domain), }, # OStatus { 'rel': 'http://schemas.google.com/g/2010#updates-from', 'type': common.CONTENT_TYPE_ATOM, 'href': atom, }, { 'rel': 'hub', 'href': hub, }, { 'rel': 'magic-public-key', 'href': key.href(), }, { 'rel': 'salmon', 'href': '%s/%s/salmon' % (self.request.host_url, domain), } ] }) logging.info('Returning WebFinger data: %s', json_dumps(data, indent=2)) return data
def post(self, domain): logging.info('Got: %s', self.request.body) # parse and validate AS2 activity try: activity = json_loads(self.request.body) assert activity except (TypeError, ValueError, AssertionError): self.error("Couldn't parse body as JSON", exc_info=True) obj = activity.get('object') or {} if isinstance(obj, str): obj = {'id': obj} type = activity.get('type') if type == 'Accept': # eg in response to a Follow return # noop if type == 'Create': type = obj.get('type') elif type not in SUPPORTED_TYPES: self.error('Sorry, %s activities are not supported yet.' % type, status=501) # TODO: verify signature if there is one if type == 'Undo' and obj.get('type') == 'Follow': # skip actor fetch below; we don't need it to undo a follow return self.undo_follow(self.redirect_unwrap(activity)) elif type == 'Delete': id = obj.get('id') # !!! temporarily disabled actually deleting Followers below because # mastodon.social sends Deletes for every Bridgy Fed account, all at # basically the same time, and we have many Follower objects, so we # have to do this table scan for each one, so the requests take a # long time and end up spawning extra App Engine instances that we # get billed for. and the Delete requests are almost never for # followers we have. TODO: revisit this and do it right. # if isinstance(id, str): # # assume this is an actor # # https://github.com/snarfed/bridgy-fed/issues/63 # for key in Follower.query().iter(keys_only=True): # if key.id().split(' ')[-1] == id: # key.delete() return # fetch actor if necessary so we have name, profile photo, etc for elem in obj, activity: actor = elem.get('actor') if actor and isinstance(actor, str): elem['actor'] = common.get_as2(actor).json() activity_unwrapped = self.redirect_unwrap(activity) if type == 'Follow': return self.accept_follow(activity, activity_unwrapped) # send webmentions to each target as1 = as2.to_as1(activity) self.send_webmentions(as1, proxy=True, protocol='activitypub', source_as2=json_dumps(activity_unwrapped))
def expect_get_publications(self, pubs): # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications self.expect_requests_get('users/abcdef01234/publications', json_dumps(pubs)) self.mox.ReplayAll()
def test_activitypub_create_post(self, mock_get, mock_post): mock_get.side_effect = [self.create, self.actor] mock_post.return_value = requests_response('abc xyz') Follower.get_or_create('orig', 'https://mastodon/aaa') Follower.get_or_create('orig', 'https://mastodon/bbb', last_follow=json_dumps({ 'actor': { 'publicInbox': 'https://public/inbox', 'inbox': 'https://unused', } })) Follower.get_or_create('orig', 'https://mastodon/ccc', last_follow=json_dumps({ 'actor': { 'endpoints': { 'sharedInbox': 'https://shared/inbox', }, } })) Follower.get_or_create('orig', 'https://mastodon/ddd', last_follow=json_dumps( {'actor': { 'inbox': 'https://inbox', }})) Follower.get_or_create('orig', 'https://mastodon/eee', status='inactive', last_follow=json_dumps( {'actor': { 'inbox': 'https://unused/2', }})) Follower.get_or_create( 'orig', 'https://mastodon/fff', last_follow=json_dumps({ 'actor': { # dupe of eee; should be de-duped 'inbox': 'https://inbox', } })) got = self.client.post('/webmention', data={ 'source': 'http://orig/post', 'target': 'https://fed.brid.gy/', }) self.assertEqual(200, got.status_code) mock_get.assert_has_calls((self.req('http://orig/post'), )) inboxes = ('https://inbox', 'https://public/inbox', 'https://shared/inbox') self.assertEqual(len(inboxes), len(mock_post.call_args_list)) for call, inbox in zip(mock_post.call_args_list, inboxes): self.assertEqual((inbox, ), call[0]) self.assertEqual(self.create_as2, json_loads(call[1]['data'])) for inbox in inboxes: resp = Response.get_by_id('http://orig/post %s' % inbox) self.assertEqual('out', resp.direction, inbox) self.assertEqual('activitypub', resp.protocol, inbox) self.assertEqual('complete', resp.status, inbox) self.assertEqual(self.create_mf2, json_loads(resp.source_mf2), inbox)
def test_get_activities_response_activity_id(self): Activity(id='tag:fa.ke,2013:123', activity_json=json_dumps({'foo': 'bar'})).put() resp = self.source.get_activities_response(activity_id='123') self.assertEqual([{'foo': 'bar'}], resp['items'])
def test_search_for_links(self): """https://github.com/snarfed/bridgy/issues/565""" self.tw.domain_urls = [ 'http://foo/', 'http://bar/baz', 'https://t.co/xyz' ] self.tw.put() results = [ { 'id_str': '0', # no link 'text': 'x foo/ y /bar/baz z', }, { 'id_str': '1', # yes, ignore http vs https for bar/baz 'text': 'no link here', 'entities': { 'urls': [ { 'expanded_url': 'http://bar' }, { 'expanded_url': 'https://bar/baz' }, ] }, }, { 'id_str': '2', # no, retweet 'text': 'a http://bar/baz ok', 'retweeted_status': { 'id_str': '456', 'text': 'a http://bar/baz ok', }, }, { 'id_str': '3', # no, link domain is blacklisted 'text': 'x https://t.co/xyz/abc z', }, { 'id_str': '4', # no link 'text': 'x http://bar/baz z', }, { 'id_str': '5', # yes 'text': 'no link here', 'entities': { 'urls': [{ 'expanded_url': 'http://foo/x?y' }] }, }, { 'id_str': '6', # yes 'text': 'a link http://bar/baz here', 'entities': { 'urls': [{ 'expanded_url': 'http://foo/' }, { 'expanded_url': 'http://other' }] }, } ] self.expect_urlopen( API_BASE + API_SEARCH % { 'q': urllib.parse.quote_plus('bar/baz OR foo'), 'count': 50 }, json_dumps({'statuses': results})) self.mox.ReplayAll() self.assert_equals([ 'tag:twitter.com,2013:1', 'tag:twitter.com,2013:5', 'tag:twitter.com,2013:6' ], [a['id'] for a in self.tw.search_for_links()])
def poll(self, source): """Actually runs the poll. Stores property names and values to update in source.updates. """ if source.last_activities_etag or source.last_activity_id: logging.debug('Using ETag %s, last activity id %s', source.last_activities_etag, source.last_activity_id) # # Step 1: fetch activities: # * posts by the user # * search all posts for the user's domain URLs to find links # cache = util.CacheDict() if source.last_activities_cache_json: cache.update(json_loads(source.last_activities_cache_json)) # search for links first so that the user's activities and responses # override them if they overlap links = source.search_for_links() # this user's own activities (and user mentions) resp = source.get_activities_response( fetch_replies=True, fetch_likes=True, fetch_shares=True, fetch_mentions=True, count=50, etag=source.last_activities_etag, min_id=source.last_activity_id, cache=cache) etag = resp.get('etag') # used later user_activities = resp.get('items', []) # these map ids to AS objects responses = {a['id']: a for a in links} activities = {a['id']: a for a in links + user_activities} # extract silo activity ids, update last_activity_id silo_activity_ids = set() last_activity_id = source.last_activity_id for id, activity in activities.items(): # maybe replace stored last activity id parsed = util.parse_tag_uri(id) if parsed: id = parsed[1] silo_activity_ids.add(id) try: # try numeric comparison first greater = int(id) > int(last_activity_id) except (TypeError, ValueError): greater = id > last_activity_id if greater: last_activity_id = id if last_activity_id and last_activity_id != source.last_activity_id: source.updates['last_activity_id'] = last_activity_id # trim cache to just the returned activity ids, so that it doesn't grow # without bound. (WARNING: depends on get_activities_response()'s cache key # format, e.g. 'PREFIX ACTIVITY_ID'!) source.updates['last_activities_cache_json'] = json_dumps( {k: v for k, v in cache.items() if k.split()[-1] in silo_activity_ids}) self.backfeed(source, responses, activities=activities) source.updates.update({'last_polled': source.last_poll_attempt, 'poll_status': 'ok'}) if etag and etag != source.last_activities_etag: source.updates['last_activities_etag'] = etag # # Possibly refetch updated syndication urls. # # if the author has added syndication urls since the first time # original_post_discovery ran, we'll miss them. this cleanup task will # periodically check for updated urls. only kicks in if the author has # *ever* published a rel=syndication url if source.should_refetch(): logging.info('refetching h-feed for source %s', source.label()) relationships = original_post_discovery.refetch(source) now = util.now_fn() source.updates['last_hfeed_refetch'] = now if relationships: logging.info('refetch h-feed found new rel=syndication relationships: %s', relationships) try: self.repropagate_old_responses(source, relationships) except BaseException as e: if ('BadRequestError' in str(e.__class__) or 'Timeout' in str(e.__class__) or util.is_connection_failure(e)): logging.info('Timeout while repropagating responses.', exc_info=True) else: raise else: logging.info( 'skipping refetch h-feed. last-syndication-url %s, last-refetch %s', source.last_syndication_url, source.last_hfeed_refetch)
def test_retry(self): source = self.sources[0] source.domain_urls = ['http://orig'] source.last_hfeed_refetch = last_hfeed_refetch = \ testutil.NOW - datetime.timedelta(minutes=1) source.put() resp = self.responses[0] resp.status = 'complete' resp.unsent = ['http://unsent'] resp.sent = ['http://sent'] resp.error = ['http://error'] resp.failed = ['http://failed'] resp.skipped = ['https://skipped'] # SyndicatedPost with new target URLs resp.activities_json = [ json_dumps({'object': { 'url': 'https://fa.ke/1' }}), json_dumps({ 'url': 'https://fa.ke/2', 'object': { 'unused': 'ok' } }), json_dumps({'url': 'https://fa.ke/3'}), ] resp.put() SyndicatedPost.insert(source, 'https://fa.ke/1', 'https://orig/1') SyndicatedPost.insert(source, 'https://fa.ke/2', 'http://orig/2') SyndicatedPost.insert(source, 'https://fa.ke/3', 'http://orig/3') key = resp.key.urlsafe().decode() self.expect_task('propagate', response_key=key) self.mox.ReplayAll() # cached webmention endpoint util.webmention_endpoint_cache['W https skipped /'] = 'asdf' response = app.application.get_response('/retry', method='POST', text=urlencode({'key': key})) self.assertEqual(302, response.status_int) self.assertEqual(source.bridgy_url(self.handler), response.headers['Location'].split('#')[0]) # status and URLs should be refreshed got = resp.key.get() self.assertEqual('new', got.status) self.assertCountEqual([ 'http://unsent/', 'http://sent/', 'https://skipped/', 'http://error/', 'http://failed/', 'https://orig/1', 'http://orig/2', 'http://orig/3' ], got.unsent) for field in got.sent, got.skipped, got.error, got.failed: self.assertEqual([], field) # webmention endpoints for URL domains should be refreshed self.assertNotIn('W https skipped /', util.webmention_endpoint_cache) # shouldn't have refetched h-feed self.assertEqual(last_hfeed_refetch, source.key.get().last_hfeed_refetch)