def find_mention_item(self, items): """Returns the mf2 item that mentions (or replies to, likes, etc) the target. May modify the items arg, e.g. may set or replace content.html or content.value. Args: items: sequence of mf2 item dicts Returns: mf2 item dict or None """ # find target URL in source for item in items: props = item.setdefault('properties', {}) # find first non-empty content element content = props.setdefault('content', [{}])[0] text = content.get('html') or content.get('value') for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of': urls = [urlparse.urldefrag(u)[0] for u in microformats2.get_string_urls(props.get(type, []))] if self.any_target_in(urls): break else: if text and self.any_target_in(text): type = 'post' url = first_value(props, 'url') or self.source_url name = first_value(props, 'name') or first_value(props, 'summary') text = content['html'] = ('mentioned this in %s.' % util.pretty_link(url, text=name, max_length=280)) else: type = None if type: # found the target! rsvp = first_value(props, 'rsvp') if rsvp: self.entity.type = 'rsvp' if not text: content['value'] = 'RSVPed %s.' % rsvp else: self.entity.type = {'in-reply-to': 'comment', 'like-of': 'like', 'repost-of': 'repost', }.get(type, type) if not text: content['value'] = {'comment': 'replied to this.', 'like': 'liked this.', 'repost': 'reposted this.', }[self.entity.type] return item # check children in case this is eg an h-feed found = self.find_mention_item(item.get('children', [])) if found: return found return None
def find_mention_item(self, data): """Returns the mf2 item that mentions (or replies to, likes, etc) the target. May modify the data arg, e.g. may set or replace content.html or content.value. Args: data: mf2 data dict Returns: mf2 item dict or None """ # find target URL in source for item in data.get('items', []): props = item.setdefault('properties', {}) # find first non-empty content element content = props.setdefault('content', [{}])[0] text = content.get('html') or content.get('value') for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of': urls = [ urlparse.urldefrag(u)[0] for u in microformats2.get_string_urls(props.get(type, [])) ] if self.any_target_in(urls): break else: if not text or not self.any_target_in(text): continue type = 'post' url = first_value(props, 'url') or self.source_url name = first_value(props, 'name') or first_value( props, 'summary') text = content['html'] = ( 'mentioned this in %s.' % util.pretty_link(url, text=name, max_length=280)) if type: # found the target! rsvp = first_value(props, 'rsvp') if rsvp: self.entity.type = 'rsvp' if not text: content['value'] = 'RSVPed %s.' % rsvp else: self.entity.type = { 'in-reply-to': 'comment', 'like-of': 'like', 'repost-of': 'repost', }.get(type, type) if not text: content['value'] = { 'comment': 'replied to this.', 'like': 'liked this.', 'repost': 'reposted this.', }[self.entity.type] return item return None
def find_mention_item(self, items): """Returns the mf2 item that mentions (or replies to, likes, etc) the target. May modify the items arg, e.g. may set or replace content.html or content.value. Args: items: sequence of mf2 item dicts Returns: mf2 item dict or None """ # find target URL in source for item in items: props = item.setdefault('properties', {}) # find first non-empty content element content = props.setdefault('content', [{}])[0] text = content.get('html') or content.get('value') for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of': urls = [urllib.parse.urldefrag(u)[0] for u in microformats2.get_string_urls(props.get(type, []))] if self.any_target_in(urls): break else: if text and self.any_target_in(text): type = 'post' url = get_first(props, 'url') or self.source_url name = get_first(props, 'name') or get_first(props, 'summary') text = content['html'] = f'mentioned this in {util.pretty_link(url, text=name, max_length=280)}.' else: type = None if type: # found the target! rsvp = get_first(props, 'rsvp') if rsvp: self.entity.type = 'rsvp' if not text: content['value'] = f'RSVPed {rsvp}.' else: self.entity.type = {'in-reply-to': 'comment', 'like-of': 'like', 'repost-of': 'repost', }.get(type, type) if not text: content['value'] = {'comment': 'replied to this.', 'like': 'liked this.', 'repost': 'reposted this.', }[self.entity.type] return item # check children in case this is eg an h-feed found = self.find_mention_item(item.get('children', [])) if found: return found return None
def find_mention_item(self, data): """Returns the mf2 item that mentions (or replies to, likes, etc) the target. May modify the data arg, e.g. may set or replace content.html or content.value. Args: data: mf2 data dict Returns: mf2 item dict or None """ # find target URL in source for item in data.get('items', []): props = item.setdefault('properties', {}) # find first non-empty content element content = props.setdefault('content', [{}])[0] text = content.get('html') or content.get('value') for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of': urls = [urlparse.urldefrag(u)[0] for u in microformats2.get_string_urls(props.get(type, []))] if self.any_target_in(urls): break else: if not text or not self.any_target_in(text): continue type = 'post' url = first_value(props, 'url') or self.source_url name = first_value(props, 'name') or first_value(props, 'summary') text = content['html'] = ('mentioned this in %s.' % util.pretty_link(url, text=name)) if type: # found the target! rsvp = first_value(props, 'rsvp') if rsvp: self.entity.type = 'rsvp' if not text: content['value'] = 'RSVPed %s.' % rsvp else: self.entity.type = {'in-reply-to': 'comment', 'like-of': 'like', 'repost-of': 'repost', }.get(type, type) if not text: content['value'] = {'comment': 'replied to this.', 'like': 'liked this.', 'repost': 'reposted this.', }[self.entity.type] return item return None
def test_get_string_urls(self): for expected, objs in ( ([], []), (['asdf'], ['asdf']), ([], [{'type': 'h-ok'}]), ([], [{'properties': {'url': ['nope']}}]), ([], [{'type': ['h-ok'], 'properties': {'no': 'url'}}]), (['good1', 'good2'], ['good1', {'type': ['h-ok']}, {'type': ['h-ok'], 'properties': {'url': ['good2']}}]), (['nested'], [{'type': ['h-ok'], 'properties': {'url': [ {'type': ['h-nested'], 'url': ['nested']}]}}]), ): self.assertEquals(expected, microformats2.get_string_urls(objs))
def expand_target_urls(self, activity): """Expand the inReplyTo or object fields of an ActivityStreams object by fetching the original and looking for rel=syndication URLs. This method modifies the dict in place. Args: activity: an ActivityStreams dict of the activity being published """ for field in ('inReplyTo', 'object'): # microformats2.json_to_object de-dupes, no need to do it here objs = activity.get(field) if not objs: continue if isinstance(objs, dict): objs = [objs] augmented = list(objs) for obj in objs: url = obj.get('url') if not url: continue # get_webmention_target weeds out silos and non-HTML targets # that we wouldn't want to download and parse url, _, ok = util.get_webmention_target(url) if not ok: continue # fetch_mf2 raises a fuss if it can't fetch a mf2 document; # easier to just grab this ourselves than add a bunch of # special-cases to that method logging.debug('expand_target_urls fetching field=%s, url=%s', field, url) try: resp = util.requests_get(url) resp.raise_for_status() data = util.mf2py_parse(resp.text, url) except AssertionError: raise # for unit tests except BaseException: # it's not a big deal if we can't fetch an in-reply-to url logging.warning( 'expand_target_urls could not fetch field=%s, url=%s', field, url, exc_info=True) continue synd_urls = data.get('rels', {}).get('syndication', []) # look for syndication urls in the first h-entry queue = collections.deque(data.get('items', [])) while queue: item = queue.popleft() item_types = set(item.get('type', [])) if 'h-feed' in item_types and 'h-entry' not in item_types: queue.extend(item.get('children', [])) continue # these can be urls or h-cites synd_urls += microformats2.get_string_urls( item.get('properties', {}).get('syndication', [])) logging.debug( 'expand_target_urls found rel=syndication for url=%s: %r', url, synd_urls) augmented += [{'url': u} for u in synd_urls] activity[field] = augmented
def expand_target_urls(self, activity): """Expand the inReplyTo or object fields of an ActivityStreams object by fetching the original and looking for rel=syndication URLs. This method modifies the dict in place. Args: activity: an ActivityStreams dict of the activity being published """ for field in ('inReplyTo', 'object'): # microformats2.json_to_object de-dupes, no need to do it here objs = activity.get(field) if not objs: continue if isinstance(objs, dict): objs = [objs] augmented = list(objs) for obj in objs: url = obj.get('url') if not url: continue parsed = urllib.parse.urlparse(url) # ignore home pages. https://github.com/snarfed/bridgy/issues/760 if parsed.path in ('', '/'): continue # get_webmention_target weeds out silos and non-HTML targets # that we wouldn't want to download and parse url, _, ok = util.get_webmention_target(url) if not ok: continue logging.debug('expand_target_urls fetching field=%s, url=%s', field, url) try: mf2 = util.fetch_mf2(url) except AssertionError: raise # for unit tests except BaseException: # it's not a big deal if we can't fetch an in-reply-to url logging.info('expand_target_urls could not fetch field=%s, url=%s', field, url, stack_info=True) continue synd_urls = mf2['rels'].get('syndication', []) # look for syndication urls in the first h-entry queue = collections.deque(mf2.get('items', [])) while queue: item = queue.popleft() item_types = set(item.get('type', [])) if 'h-feed' in item_types and 'h-entry' not in item_types: queue.extend(item.get('children', [])) continue # these can be urls or h-cites synd_urls += microformats2.get_string_urls( item.get('properties', {}).get('syndication', [])) logging.debug('expand_target_urls found rel=syndication for url=%s: %r', url, synd_urls) augmented += [{'url': u} for u in synd_urls] activity[field] = augmented
def get(self, type, source_short_name, string_id, *ids): source_cls = models.sources.get(source_short_name) if not source_cls: self.abort( 400, "Source type '%s' not found. Known sources: %s" % (source_short_name, filter(None, models.sources.keys()))) self.source = source_cls.get_by_id(string_id) if not self.source: self.abort( 400, 'Source %s %s not found' % (source_short_name, string_id)) elif (self.source.status == 'disabled' or ('listen' not in self.source.features and 'email' not in self.source.features)): self.abort( 400, 'Source %s is disabled for backfeed' % self.source.bridgy_path()) format = self.request.get('format', 'html') if format not in ('html', 'json'): self.abort(400, 'Invalid format %s, expected html or json' % format) for id in ids: if not self.VALID_ID.match(id): self.abort(404, 'Invalid id %s' % id) label = '%s:%s %s %s' % (source_short_name, string_id, type, ids) cache_key = 'H ' + label obj = memcache.get(cache_key) if obj and not appengine_config.DEBUG: logging.info('Using cached object for %s', label) else: logging.info('Fetching %s', label) try: obj = self.get_item(*ids) except models.DisableSource as e: self.abort( 401, "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!" ) except ValueError as e: self.abort(400, '%s error:\n%s' % (self.source.GR_CLASS.NAME, e)) except Exception as e: # pass through all API HTTP errors if we can identify them code, body = util.interpret_http_exception(e) # temporary, trying to debug a flaky test failure # eg https://circleci.com/gh/snarfed/bridgy/769 if code: self.response.status_int = int(code) self.response.headers['Content-Type'] = 'text/plain' self.response.write('%s error:\n%s' % (self.source.GR_CLASS.NAME, body)) return else: raise memcache.set(cache_key, obj, time=CACHE_TIME) if not obj: self.abort(404, label) if self.source.is_blocked(obj): self.abort(410, 'That user is currently blocked') # use https for profile pictures so we don't cause SSL mixed mode errors # when serving over https. author = obj.get('author', {}) image = author.get('image', {}) url = image.get('url') if url: image['url'] = util.update_scheme(url, self) mf2_json = microformats2.object_to_json(obj, synthesize_content=False) # try to include the author's silo profile url author = first_props(mf2_json.get('properties', {})).get('author', {}) author_uid = first_props(author.get('properties', {})).get('uid', '') if author_uid: parsed = util.parse_tag_uri(author_uid) if parsed: silo_url = self.source.gr_source.user_url(parsed[1]) urls = author.get('properties', {}).setdefault('url', []) if silo_url not in microformats2.get_string_urls(urls): urls.append(silo_url) # write the response! self.response.headers['Access-Control-Allow-Origin'] = '*' if format == 'html': self.response.headers['Content-Type'] = 'text/html; charset=utf-8' url = obj.get('url', '') self.response.out.write( TEMPLATE.substitute({ 'refresh': (('<meta http-equiv="refresh" content="0;url=%s">' % url) if url else ''), 'url': url, 'body': microformats2.json_to_html(mf2_json), 'title': self.get_title(obj), })) elif format == 'json': self.response.headers[ 'Content-Type'] = 'application/json; charset=utf-8' self.response.out.write(json.dumps(mf2_json, indent=2))
def get(self, type, source_short_name, string_id, *ids): source_cls = models.sources.get(source_short_name) if not source_cls: self.abort(400, "Source type '%s' not found. Known sources: %s" % (source_short_name, filter(None, models.sources.keys()))) self.source = source_cls.get_by_id(string_id) if not self.source: self.abort(400, 'Source %s %s not found' % (source_short_name, string_id)) elif self.source.status == 'disabled' or 'listen' not in self.source.features: self.abort(400, 'Source %s is disabled for backfeed' % self.source.bridgy_path()) format = self.request.get('format', 'html') if format not in ('html', 'json'): self.abort(400, 'Invalid format %s, expected html or json' % format) for id in ids: if not self.VALID_ID.match(id): self.abort(404, 'Invalid id %s' % id) label = '%s:%s %s %s' % (source_short_name, string_id, type, ids) cache_key = 'H ' + label obj = memcache.get(cache_key) if obj: logging.info('Using cached object for %s', label) else: logging.info('Fetching %s', label) try: obj = self.get_item(*ids) except models.DisableSource as e: self.abort(401, "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!") except Exception as e: # pass through all API HTTP errors if we can identify them code, body = util.interpret_http_exception(e) if not code and util.is_connection_failure(e): code = 503 body = str(e) if code: self.response.status_int = int(code) self.response.headers['Content-Type'] = 'text/plain' self.response.write('%s error:\n%s' % (self.source.GR_CLASS.NAME, body)) return else: raise memcache.set(cache_key, obj, time=CACHE_TIME) if not obj: self.abort(404, label) # use https for profile pictures so we don't cause SSL mixed mode errors # when serving over https. author = obj.get('author', {}) image = author.get('image', {}) url = image.get('url') if url: image['url'] = util.update_scheme(url, self) mf2_json = microformats2.object_to_json(obj, synthesize_content=False) # try to include the author's silo profile url author = first_props(mf2_json.get('properties', {})).get('author', {}) author_uid = first_props(author.get('properties', {})).get('uid', '') if author_uid: parsed = util.parse_tag_uri(author_uid) if parsed: silo_url = self.source.gr_source.user_url(parsed[1]) urls = author.get('properties', {}).setdefault('url', []) if silo_url not in microformats2.get_string_urls(urls): urls.append(silo_url) # write the response! self.response.headers['Access-Control-Allow-Origin'] = '*' if format == 'html': self.response.headers['Content-Type'] = 'text/html; charset=utf-8' self.response.out.write(TEMPLATE.substitute({ 'url': obj.get('url', ''), 'body': microformats2.json_to_html(mf2_json), 'title': self.get_title(obj), })) elif format == 'json': self.response.headers['Content-Type'] = 'application/json; charset=utf-8' self.response.out.write(json.dumps(mf2_json, indent=2))
def expand_target_urls(self, activity): """Expand the inReplyTo or object fields of an ActivityStreams object by fetching the original and looking for rel=syndication URLs. This method modifies the dict in place. Args: activity: an ActivityStreams dict of the activity being published """ for field in ('inReplyTo', 'object'): # microformats2.json_to_object de-dupes, no need to do it here objs = activity.get(field) if not objs: continue if isinstance(objs, dict): objs = [objs] augmented = list(objs) for obj in objs: url = obj.get('url') if not url: continue # get_webmention_target weeds out silos and non-HTML targets # that we wouldn't want to download and parse url, _, ok = util.get_webmention_target(url) if not ok: continue # fetch_mf2 raises a fuss if it can't fetch a mf2 document; # easier to just grab this ourselves than add a bunch of # special-cases to that method logging.debug('expand_target_urls fetching field=%s, url=%s', field, url) try: resp = util.requests_get(url) resp.raise_for_status() data = mf2py.Parser(url=url, doc=resp.text).to_dict() except AssertionError: raise # for unit tests except BaseException: # it's not a big deal if we can't fetch an in-reply-to url logging.warning('expand_target_urls could not fetch field=%s, url=%s', field, url, exc_info=True) continue synd_urls = data.get('rels', {}).get('syndication', []) # look for syndication urls in the first h-entry queue = collections.deque(data.get('items', [])) while queue: item = queue.popleft() item_types = set(item.get('type', [])) if 'h-feed' in item_types and 'h-entry' not in item_types: queue.extend(item.get('children', [])) continue # these can be urls or h-cites synd_urls += microformats2.get_string_urls( item.get('properties', {}).get('syndication', [])) logging.debug('expand_target_urls found rel=syndication for url=%s: %r', url, synd_urls) augmented += [{'url': u} for u in synd_urls] activity[field] = augmented
class ItemHandler(webapp2.RequestHandler): """Fetches a post, repost, like, or comment and serves it as mf2 HTML or JSON. """ handle_exception = handlers.handle_exception source = None VALID_ID = re.compile(r'^[\w.+:@-]+$') def head(self, *args): """Return an empty 200 with no caching directives.""" def get_item(self, id): """Fetches and returns an object from the given source. To be implemented by subclasses. Args: source: bridgy.Source subclass id: string Returns: ActivityStreams object dict """ raise NotImplementedError() def get_title(self, obj): """Returns the string to be used in the <title> tag. Args: obj: ActivityStreams object """ return obj.get('title') or obj.get('content') or 'Bridgy Response' def get_post(self, id, **kwargs): """Fetch a post. Args: id: string, site-specific post id is_event: bool kwargs: passed through to get_activities Returns: ActivityStreams object dict """ try: posts = self.source.get_activities(activity_id=id, user_id=self.source.key.id(), **kwargs) if posts: return posts[0] logging.warning('Source post %s not found', id) except Exception as e: util.interpret_http_exception(e) def get(self, type, source_short_name, string_id, *ids): source_cls = models.sources.get(source_short_name) if not source_cls: self.abort( 400, "Source type '%s' not found. Known sources: %s" % (source_short_name, filter(None, models.sources.keys()))) self.source = source_cls.get_by_id(string_id) if not self.source: self.abort( 400, 'Source %s %s not found' % (source_short_name, string_id)) format = self.request.get('format', 'html') if format not in ('html', 'json'): self.abort(400, 'Invalid format %s, expected html or json' % format) for id in ids: if not self.VALID_ID.match(id): self.abort(404, 'Invalid id %s' % id) label = '%s:%s %s %s' % (source_short_name, string_id, type, ids) cache_key = 'H ' + label obj = memcache.get(cache_key) if obj: logging.info('Using cached object for %s', label) else: logging.info('Fetching %s', label) try: obj = self.get_item(*ids) except Exception, e: # pass through all API HTTP errors if we can identify them code, body = util.interpret_http_exception(e) if not code and util.is_connection_failure(e): code = 503 body = str(e) if code: self.response.status_int = int(code) self.response.headers['Content-Type'] = 'text/plain' self.response.write('%s error:\n%s' % (self.source.GR_CLASS.NAME, body)) return else: raise memcache.set(cache_key, obj, time=CACHE_TIME) if not obj: self.abort(404, label) # use https for profile pictures so we don't cause SSL mixed mode errors # when serving over https. author = obj.get('author', {}) image = author.get('image', {}) url = image.get('url') if url: image['url'] = util.update_scheme(url, self) mf2_json = microformats2.object_to_json(obj, synthesize_content=False) # try to include the author's silo profile url author = first_props(mf2_json.get('properties', {})).get('author', {}) author_uid = first_props(author.get('properties', {})).get('uid', '') if author_uid: parsed = util.parse_tag_uri(author_uid) if parsed: silo_url = self.source.gr_source.user_url(parsed[1]) urls = author.get('properties', {}).setdefault('url', []) if silo_url not in microformats2.get_string_urls(urls): urls.append(silo_url) # write the response! self.response.headers['Access-Control-Allow-Origin'] = '*' if format == 'html': self.response.headers['Content-Type'] = 'text/html; charset=utf-8' self.response.out.write( TEMPLATE.substitute({ 'url': obj.get('url', ''), 'body': microformats2.json_to_html(mf2_json), 'title': self.get_title(obj), })) elif format == 'json': self.response.headers[ 'Content-Type'] = 'application/json; charset=utf-8' self.response.out.write(json.dumps(mf2_json, indent=2))
image = author.get('image', {}) url = image.get('url') if url: image['url'] = util.update_scheme(url, self) mf2_json = microformats2.object_to_json(obj) # try to include the author's silo profile url author = first_props(mf2_json.get('properties', {})).get('author', {}) author_uid = first_props(author.get('properties', {})).get('uid', '') if author_uid: parsed = util.parse_tag_uri(author_uid) if parsed: silo_url = self.source.gr_source.user_url(parsed[1]) urls = author.get('properties', {}).setdefault('url', []) if silo_url not in microformats2.get_string_urls(urls): urls.append(silo_url) # write the response! self.response.headers['Access-Control-Allow-Origin'] = '*' if format == 'html': self.response.headers['Content-Type'] = 'text/html; charset=utf-8' self.response.out.write(TEMPLATE.substitute({ 'url': obj.get('url', ''), 'body': microformats2.json_to_html(mf2_json), 'title': obj.get('title', obj.get('content', 'Bridgy Response')), })) elif format == 'json': self.response.headers['Content-Type'] = 'application/json; charset=utf-8' self.response.out.write(json.dumps(mf2_json, indent=2))
def dispatch_request(self, site, key_id, **kwargs): """Handle HTTP request.""" source_cls = models.sources.get(site) if not source_cls: error( f"Source type '{site}' not found. Known sources: {[s for s in models.sources.keys() if s]}" ) self.source = source_cls.get_by_id(key_id) if not self.source: error(f'Source {site} {key_id} not found') elif (self.source.status == 'disabled' or 'listen' not in self.source.features): error( f'Source {self.source.bridgy_path()} is disabled for backfeed') format = request.values.get('format', 'html') if format not in ('html', 'json'): error(f'Invalid format {format}, expected html or json') for id in kwargs.values(): if not self.VALID_ID.match(id): error(f'Invalid id {id}', 404) try: obj = self.get_item(**kwargs) except models.DisableSource: error( "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!", 401) except ValueError as e: error(f'{self.source.GR_CLASS.NAME} error: {e}') if not obj: error(f'Not found: {site}:{key_id} {kwargs}', 404) if self.source.is_blocked(obj): error('That user is currently blocked', 410) # use https for profile pictures so we don't cause SSL mixed mode errors # when serving over https. author = obj.get('author', {}) image = author.get('image', {}) url = image.get('url') if url: image['url'] = util.update_scheme(url, request) mf2_json = microformats2.object_to_json(obj, synthesize_content=False) # try to include the author's silo profile url author = first_props(mf2_json.get('properties', {})).get('author', {}) author_uid = first_props(author.get('properties', {})).get('uid', '') if author_uid: parsed = util.parse_tag_uri(author_uid) if parsed: urls = author.get('properties', {}).setdefault('url', []) try: silo_url = self.source.gr_source.user_url(parsed[1]) if silo_url not in microformats2.get_string_urls(urls): urls.append(silo_url) except NotImplementedError: # from gr_source.user_url() pass # write the response! if format == 'html': url = obj.get('url', '') return TEMPLATE.substitute({ 'refresh': (f'<meta http-equiv="refresh" content="0;url={url}">' if url else ''), 'url': url, 'body': microformats2.json_to_html(mf2_json), 'title': obj.get('title') or obj.get('content') or 'Bridgy Response', }) elif format == 'json': return mf2_json