def find_mention_item(self, items): """Returns the mf2 item that mentions (or replies to, likes, etc) the target. May modify the items arg, e.g. may set or replace content.html or content.value. Args: items: sequence of mf2 item dicts Returns: mf2 item dict or None """ # find target URL in source for item in items: props = item.setdefault('properties', {}) # find first non-empty content element content = props.setdefault('content', [{}])[0] text = content.get('html') or content.get('value') for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of': urls = [urllib.parse.urldefrag(u)[0] for u in microformats2.get_string_urls(props.get(type, []))] if self.any_target_in(urls): break else: if text and self.any_target_in(text): type = 'post' url = get_first(props, 'url') or self.source_url name = get_first(props, 'name') or get_first(props, 'summary') text = content['html'] = f'mentioned this in {util.pretty_link(url, text=name, max_length=280)}.' else: type = None if type: # found the target! rsvp = get_first(props, 'rsvp') if rsvp: self.entity.type = 'rsvp' if not text: content['value'] = f'RSVPed {rsvp}.' else: self.entity.type = {'in-reply-to': 'comment', 'like-of': 'like', 'repost-of': 'repost', }.get(type, type) if not text: content['value'] = {'comment': 'replied to this.', 'like': 'liked this.', 'repost': 'reposted this.', }[self.entity.type] return item # check children in case this is eg an h-feed found = self.find_mention_item(item.get('children', [])) if found: return found return None
def _single_target(self): """ Returns: string URL, the source's inReplyTo or object (if appropriate) """ target = util.get_first(self.source_obj, 'inReplyTo') if target: return util.get_url(target) if self.source_obj.get('verb') in source.VERBS_WITH_OBJECT: return util.get_url(util.get_first(self.source_obj, 'object'))
def _render_attachments(attachments, obj): """Renders ActivityStreams attachments (or tags etc) as HTML. Note that the returned HTML is included in Atom as well as HTML documents, so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc. Args: attachments: sequence of decoded JSON ActivityStreams objects obj: top-level decoded JSON ActivityStreams object Returns: string, rendered HTML """ content = '' for att in attachments: name = att.get('displayName', '') stream = get_first(att, 'stream', {}).get('url') or '' image = get_first(att, 'image', {}).get('url') or '' open_a_tag = False content += '\n<p>' type = att.get('objectType') if type == 'video': if stream: content += vid(stream, poster=image) elif type == 'audio': if stream: content += aud(stream) else: url = att.get('url') or obj.get('url') if url: content += '\n<a class="link" href="%s">' % url open_a_tag = True if image: content += '\n' + img(image, name) if name and type != 'image': content += '\n<span class="name">%s</span>' % name if open_a_tag: content += '\n</a>' summary = att.get('summary') if summary and summary != name: content += '\n<span class="summary">%s</span>' % summary content += '\n</p>' return content
def _render_attachments(attachments, obj): """Renders ActivityStreams attachments (or tags etc) as HTML. Note that the returned HTML is included in Atom as well as HTML documents, so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc. Args: attachments: sequence of decoded JSON ActivityStreams objects obj: top-level decoded JSON ActivityStreams object Returns: string, rendered HTML """ content = '' for att in attachments: name = att.get('displayName', '') stream = get_first(att, 'stream', {}).get('url') or '' image = get_first(att, 'image', {}).get('url') or '' open_a_tag = False content += '\n<p>' if att.get('objectType') == 'video': if stream: content += vid(stream, poster=image) elif att.get('objectType') == 'audio': if stream: content += aud(stream) else: url = att.get('url') or obj.get('url') if url: content += '\n<a class="link" href="%s">' % url open_a_tag = True if image: content += '\n' + img(image, name) if name: content += '\n<span class="name">%s</span>' % name if open_a_tag: content += '\n</a>' summary = att.get('summary') if summary and summary != name: content += '\n<span class="summary">%s</span>' % summary content += '\n</p>' return content
def first_props(props): """Converts a multiply-valued dict to singly valued. Args: props: dict of properties, where each value is a sequence Returns: corresponding dict with just the first value of each sequence, or '' if the sequence is empty """ return {k: util.get_first(props, k, '') for k in props} if props else {}
def _prepare_actor(actor): """Preprocesses an AS1 actor to prepare it to be rendered as Atom. Modifies actor in place. Args: actor: ActivityStreams 1 actor dict """ if actor: actor['image'] = util.get_first(actor, 'image')
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None, xml_base=None, rels=None, reader=True): """Converts ActivityStreams 1 activities to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. xml_base: the base URL, if any. Used in the top-level xml:base attribute. rels: rel links to include. dict mapping string rel value to string URL. reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') if request_url is None: request_url = host_url for a in activities: _prepare_activity(a, reader=reader) updated = (util.get_first(activities[0], 'object', default={}).get( 'published', '') if activities else '') if actor is None: actor = {} return jinja_env.get_template(FEED_TEMPLATE).render( actor=Defaulter(actor), host_url=host_url, items=[Defaulter(a) for a in activities], mimetypes=mimetypes, rels=rels or {}, request_url=request_url, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=updated, VERBS_WITH_OBJECT=source.VERBS_WITH_OBJECT, xml_base=xml_base, )
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None, xml_base=None, rels=None, reader=True): """Converts ActivityStreams 1 activities to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. xml_base: the base URL, if any. Used in the top-level xml:base attribute. rels: rel links to include. dict mapping string rel value to string URL. reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') if request_url is None: request_url = host_url for a in activities: _prepare_activity(a, reader=reader) updated = (util.get_first(activities[0], 'object', default={}).get('published', '') if activities else '') if actor is None: actor = {} return jinja_env.get_template(FEED_TEMPLATE).render( actor=Defaulter(actor), host_url=host_url, items=[Defaulter(a) for a in activities], mimetypes=mimetypes, rels=rels or {}, request_url=request_url, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=updated, VERBS_WITH_OBJECT=source.VERBS_WITH_OBJECT, xml_base=xml_base, )
def _create(self, obj, preview, include_link=False, ignore_formatting=False): """Creates or previews creating for the previous two methods. https://www.flickr.com/services/api/upload.api.html https://www.flickr.com/services/api/flickr.photos.comments.addComment.html https://www.flickr.com/services/api/flickr.favorites.add.html https://www.flickr.com/services/api/flickr.photos.people.add.html Args: obj: ActivityStreams object preview: boolean include_link: boolean Return: a CreationResult """ # photo, comment, or like type = source.object_type(obj) logging.debug('publishing object type %s to Flickr', type) link_text = '(Originally published at: %s)' % obj.get('url') image_url = util.get_first(obj, 'image', {}).get('url') video_url = util.get_first(obj, 'stream', {}).get('url') content = self._content_for_create(obj, ignore_formatting=ignore_formatting, strip_first_video_tag=bool(video_url)) if (video_url or image_url) and type in ('note', 'article'): name = obj.get('displayName') people = self._get_person_tags(obj) hashtags = [t.get('displayName') for t in obj.get('tags', []) if t.get('objectType') == 'hashtag' and t.get('displayName')] lat = obj.get('location', {}).get('latitude') lng = obj.get('location', {}).get('longitude') # if name does not represent an explicit title, then we'll just # use it as the title and wipe out the content if name and content and not mf2util.is_name_a_title(name, content): name = content content = None # add original post link if include_link: content = ((content + '\n\n') if content else '') + link_text if preview: preview_content = '' if name: preview_content += '<h4>%s</h4>' % name if content: preview_content += '<div>%s</div>' % content if hashtags: preview_content += '<div> %s</div>' % ' '.join('#' + t for t in hashtags) if people: preview_content += '<div> with %s</div>' % ', '.join( ('<a href="%s">%s</a>' % ( p.get('url'), p.get('displayName') or 'User %s' % p.get('id')) for p in people)) if lat and lng: preview_content += '<div> at <a href="https://maps.google.com/maps?q=%s,%s">%s, %s</a></div>' % (lat, lng, lat, lng) if video_url: preview_content += ('<video controls src="%s"><a href="%s">this video' '</a></video>' % (video_url, video_url)) else: preview_content += '<img src="%s" />' % image_url return source.creation_result(content=preview_content, description='post') params = [] if name: params.append(('title', name)) if content: params.append(('description', content)) if hashtags: params.append( ('tags', ','.join('"%s"' % t if ' ' in t else t for t in hashtags))) file = util.urlopen(video_url or image_url) resp = self.upload(params, file) photo_id = resp.get('id') resp.update({ 'type': 'post', 'url': self.photo_url(self.path_alias() or self.user_id(), photo_id), }) if video_url: resp['granary_message'] = \ "Note that videos take time to process before they're visible." # add person tags for person_id in sorted(p.get('id') for p in people): self.call_api_method('flickr.photos.people.add', { 'photo_id': photo_id, 'user_id': person_id, }) # add location if lat and lng: self.call_api_method('flickr.photos.geo.setLocation', { 'photo_id': photo_id, 'lat': lat, 'lon': lng, }) return source.creation_result(resp) base_obj = self.base_object(obj) base_id = base_obj.get('id') base_url = base_obj.get('url') # maybe a comment on a flickr photo? if type == 'comment' or obj.get('inReplyTo'): if not base_id: return source.creation_result( abort=True, error_plain='Could not find a photo to comment on.', error_html='Could not find a photo to <a href="http://indiewebcamp.com/reply">comment on</a>. ' 'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> ' 'link to a Flickr photo or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.') if include_link: content += '\n\n' + link_text if preview: return source.creation_result( content=content, description='comment on <a href="%s">this photo</a>.' % base_url) resp = self.call_api_method('flickr.photos.comments.addComment', { 'photo_id': base_id, 'comment_text': content, }) resp = resp.get('comment', {}) resp.update({ 'type': 'comment', 'url': resp.get('permalink'), }) return source.creation_result(resp) if type == 'like': if not base_id: return source.creation_result( abort=True, error_plain='Could not find a photo to favorite.', error_html='Could not find a photo to <a href="http://indiewebcamp.com/like">favorite</a>. ' 'Check that your post has an <a href="http://indiewebcamp.com/like">like-of</a> ' 'link to a Flickr photo or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.') if preview: return source.creation_result( description='favorite <a href="%s">this photo</a>.' % base_url) # this method doesn't return any data self.call_api_method('flickr.favorites.add', { 'photo_id': base_id, }) # TODO should we canonicalize the base_url (e.g. removing trailing path # info like "/in/contacts/") return source.creation_result({ 'type': 'like', 'url': '%s#favorited-by-%s' % (base_url, self.user_id()), }) return source.creation_result( abort=False, error_plain='Cannot publish type=%s to Flickr.' % type, error_html='Cannot publish type=%s to Flickr.' % type)
def json_to_object(mf2, actor=None, fetch_mf2=False): """Converts a single microformats2 JSON item to an ActivityStreams object. Supports h-entry, h-event, h-card, and other single item times. Does *not* yet support h-feed. Args: mf2: dict, decoded JSON microformats2 object actor: optional author AS actor object. usually comes from a rel="author" link. if mf2 has its own author, that will override this. fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary, e.g. to determine authorship: https://indieweb.org/authorship Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} mf2 = copy.copy(mf2) props = mf2.setdefault('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') # convert author mf2_author = prop.get('author') if mf2_author and isinstance(mf2_author, dict): author = json_to_object(mf2_author) else: # the author h-card may be on another page. run full authorship algorithm: # https://indieweb.org/authorship def fetch(url): return mf2py.parse(util.requests_get(url).text, url=url, img_with_alt=True) author = mf2util.find_author( {'items': [mf2]}, hentry=mf2, fetch_mf2_func=fetch if fetch_mf2 else None) if author: author = { 'objectType': 'person', 'url': author.get('url'), 'displayName': author.get('name'), 'image': [{'url': author.get('photo')}], } if not author: author = actor mf2_types = mf2.get('type') or [] if 'h-geo' in mf2_types or 'p-location' in mf2_types: mf2_type = 'location' elif 'tag-of' in props: # TODO: remove once this is in mf2util # https://github.com/kylewm/mf2util/issues/18 mf2_type = 'tag' elif 'follow-of' in props: # ditto mf2_type = 'follow' else: # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type # *is* a photo. so, special case photo type to fall through to underlying # mf2 type without photo. # https://github.com/snarfed/bridgy/issues/702 without_photo = copy.deepcopy(mf2) without_photo.get('properties', {}).pop('photo', None) mf2_type = mf2util.post_type_discovery(without_photo) as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None)) if rsvp: as_verb = 'rsvp-%s' % rsvp # special case GitHub issues that are in-reply-to the repo or its issues URL in_reply_tos = get_string_urls(props.get('in-reply-to', [])) for url in in_reply_tos: if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url): as_type = 'issue' def is_absolute(url): """Filter out relative and invalid URLs (mf2py gives absolute urls).""" return urllib.parse.urlparse(url).netloc urls = props.get('url') and get_string_urls(props.get('url')) # quotations: https://indieweb.org/quotation#How_to_markup attachments = [ json_to_object(quote) for quote in mf2.get('children', []) + props.get('quotation-of', []) if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))] # audio and video for type in 'audio', 'video': attachments.extend({'objectType': type, 'stream': {'url': url}} for url in get_string_urls(props.get(type, []))) obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'startTime': prop.get('start'), 'endTime': prop.get('end'), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None, # image is special cased below, to handle alt 'stream': [{'url': url} for url in get_string_urls(props.get('video', []))], 'location': json_to_object(prop.get('location')), 'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]}, 'tags': [{'objectType': 'hashtag', 'displayName': cat} if isinstance(cat, basestring) else json_to_object(cat) for cat in props.get('category', [])], 'attachments': attachments, } # images, including alt text photo_urls = set() obj['image'] = [] for photo in props.get('photo', []) + props.get('featured', []): url = photo alt = None if isinstance(photo, dict): photo = photo.get('properties') or photo url = get_first(photo, 'value') or get_first(photo, 'url') alt = get_first(photo, 'alt') if url and url not in photo_urls and is_absolute(url): photo_urls.add(url) obj['image'].append({'url': url, 'displayName': alt}) # mf2util uses the indieweb/mf2 location algorithm to collect location properties. interpreted = mf2util.interpret({'items': [mf2]}, None) if interpreted: loc = interpreted.get('location') if loc: obj['location']['objectType'] = 'place' lat, lng = loc.get('latitude'), loc.get('longitude') if lat and lng: try: obj['location'].update({ 'latitude': float(lat), 'longitude': float(lng), }) except ValueError: logging.warn( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ( 'follow-of', 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else {'url': target} # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) if as_verb == 'tag': obj['target'] = {'url': prop['tag-of']} if obj.get('object'): raise NotImplementedError( 'Combined in-reply-to and tag-of is not yet supported.') obj['object'] = obj.pop('tags') else: obj.update({ 'inReplyTo': [{'url': url} for url in in_reply_tos], 'author': author, }) return source.Source.postprocess_object(obj)
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(entry_class, basestring) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), # photo is special cased below, to handle alt 'video': dedupe_urls(get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': ( # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that # something is being "quoted," like in a quote tweet, so i cheat and use # extra knowledge here that quoted tweets are converted to note # attachments, but URLs in the tweet text are converted to article tags. [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] if 'startIndex' not in a] + [object_to_json(a, trim_nulls=False, entry_class=['h-cite']) for a in attachments['article'] if 'startIndex' not in a]) } # photos, including alt text photo_urls = set() ret['properties']['photo'] = [] for image in get_list(attachments, 'image') + [primary]: for url in get_urls(image, 'image'): if url and url not in photo_urls: photo_urls.add(url) name = get_first(image, 'image', {}).get('displayName') ret['properties']['photo'].append({'value': url, 'alt': name} if name else url) # hashtags and person tags if obj_type == 'tag': ret['properties']['tag-of'] = util.get_urls(obj, 'target') tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) if not tags and obj_type == 'tag': tags = util.get_list(obj, 'object') ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag' or obj_type == 'tag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ( ('favorite', 'like'), ('follow', 'follow'), ('like', 'like'), ('share', 'repost'), ): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def image_url(obj): return util.get_first(obj, 'image', {}).get('url')
def json_to_object(mf2, actor=None, fetch_mf2=False): """Converts a single microformats2 JSON item to an ActivityStreams object. Supports h-entry, h-event, h-card, and other single item times. Does *not* yet support h-feed. Args: mf2: dict, decoded JSON microformats2 object actor: optional author AS actor object. usually comes from a rel="author" link. if mf2 has its own author, that will override this. fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary, e.g. to determine authorship: https://indieweb.org/authorship Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} mf2 = copy.copy(mf2) props = mf2.setdefault('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') # convert author mf2_author = prop.get('author') if mf2_author and isinstance(mf2_author, dict): author = json_to_object(mf2_author) else: # the author h-card may be on another page. run full authorship algorithm: # https://indieweb.org/authorship author = mf2util.find_author({'items': [mf2]}, hentry=mf2, fetch_mf2_func=util.fetch_mf2 if fetch_mf2 else None) if author: author = { 'objectType': 'person', 'url': author.get('url'), 'displayName': author.get('name'), 'image': [{'url': author.get('photo')}], } if not author: author = actor mf2_types = mf2.get('type') or [] if 'h-geo' in mf2_types or 'p-location' in mf2_types: mf2_type = 'location' elif 'tag-of' in props: # TODO: remove once this is in mf2util # https://github.com/kylewm/mf2util/issues/18 mf2_type = 'tag' elif 'follow-of' in props: # ditto mf2_type = 'follow' else: # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type # *is* a photo. so, special case photo type to fall through to underlying # mf2 type without photo. # https://github.com/snarfed/bridgy/issues/702 without_photo = copy.deepcopy(mf2) without_photo.get('properties', {}).pop('photo', None) mf2_type = mf2util.post_type_discovery(without_photo) as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None)) if rsvp: as_verb = 'rsvp-%s' % rsvp # special case GitHub issues that are in-reply-to the repo or its issues URL in_reply_tos = get_string_urls(props.get('in-reply-to', [])) for url in in_reply_tos: if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url): as_type = 'issue' def is_absolute(url): """Filter out relative and invalid URLs (mf2py gives absolute urls).""" return urllib.parse.urlparse(url).netloc urls = props.get('url') and get_string_urls(props.get('url')) # quotations: https://indieweb.org/quotation#How_to_markup attachments = [ json_to_object(quote) for quote in mf2.get('children', []) + props.get('quotation-of', []) if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))] # audio and video # # the duration mf2 property is still emerging. examples in the wild use both # integer seconds and ISO 8601 durations. # https://indieweb.org/duration # https://en.wikipedia.org/wiki/ISO_8601#Durations duration = prop.get('duration') or prop.get('length') if duration: if util.is_int(duration): duration = int(duration) else: parsed = util.parse_iso8601_duration(duration) if parsed: duration = int(parsed.total_seconds()) else: logging.debug('Unknown format for length or duration %r', duration) duration = None stream = None bytes = size_to_bytes(prop.get('size')) for type in 'audio', 'video': atts = [{ 'objectType': type, 'stream': { 'url': url, # integer seconds: http://activitystrea.ms/specs/json/1.0/#media-link 'duration': duration, # file size in bytes. nonstandard, not in AS1 or AS2 'size': bytes, }, } for url in get_string_urls(props.get(type, []))] attachments.extend(atts) if atts: stream = atts[0]['stream'] obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'startTime': prop.get('start'), 'endTime': prop.get('end'), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None, # image is special cased below, to handle alt 'stream': [stream], 'location': json_to_object(prop.get('location')), 'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]}, 'tags': [{'objectType': 'hashtag', 'displayName': cat} if isinstance(cat, str) else json_to_object(cat) for cat in props.get('category', [])], 'attachments': attachments, } # images, including alt text photo_urls = set() obj['image'] = [] for photo in props.get('photo', []) + props.get('featured', []): url = photo alt = None if isinstance(photo, dict): photo = photo.get('properties') or photo url = get_first(photo, 'value') or get_first(photo, 'url') alt = get_first(photo, 'alt') if url and url not in photo_urls and is_absolute(url): photo_urls.add(url) obj['image'].append({'url': url, 'displayName': alt}) # mf2util uses the indieweb/mf2 location algorithm to collect location properties. interpreted = mf2util.interpret({'items': [mf2]}, None) if interpreted: loc = interpreted.get('location') if loc: obj['location']['objectType'] = 'place' lat, lng = loc.get('latitude'), loc.get('longitude') if lat and lng: try: obj['location'].update({ 'latitude': float(lat), 'longitude': float(lng), }) except ValueError: logging.debug( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ( 'follow-of', 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else {'url': target} # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) if as_verb == 'tag': obj['target'] = {'url': prop['tag-of']} if obj.get('object'): raise NotImplementedError( 'Combined in-reply-to and tag-of is not yet supported.') obj['object'] = obj.pop('tags') else: obj.update({ 'inReplyTo': [{'url': url} for url in in_reply_tos], 'author': author, }) return source.Source.postprocess_object(obj)
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string, the mf2 class that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj: return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # TODO: more tags. most will be p-category? ret = { 'type': (['h-card'] if obj_type == 'person' else ['h-card', 'p-location'] if obj_type == 'place' else [entry_class]), 'properties': { 'uid': [obj.get('id', '')], 'name': [name], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), 'photo': [ image.get('url') for image in (util.get_list(obj, 'image') or util.get_list(primary, 'image')) ], 'video': [obj.get('stream', primary.get('stream', {})).get('url')], 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [ object_to_json(author, trim_nulls=False, default_object_type='person') ], 'location': [ object_to_json(primary.get('location', {}), trim_nulls=False, default_object_type='place') ], 'latitude': primary.get('latitude'), 'longitude': primary.get('longitude'), 'comment': [ object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', []) ], }, 'children': [ object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in primary.get('attachments', []) if c.get('objectType') in ('note', 'article') ], } # hashtags and person tags tags = obj.get('tags', []) or util.get_first(obj, 'object', {}).get( 'tags', []) ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': cls = 'u-category h-card' elif tag.get('objectType') == 'hashtag': cls = 'u-category' else: continue ret['properties']['category'].append( object_to_json(tag, entry_class=cls)) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = util.get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs ] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type ] if trim_nulls: ret = util.trim_nulls(ret) return ret
def _prepare_activity(a, reader=True): """Preprocesses an activity to prepare it to be rendered as Atom. Modifies a in place. Args: a: ActivityStreams 1 activity dict reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. """ act_type = source.object_type(a) if not act_type or act_type == 'post': primary = a.get('object', {}) else: primary = a obj = a.setdefault('object', {}) # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands( microformats2.render_content(primary, include_location=reader, render_attachments=True)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize( _encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape( BeautifulSoup(a['title']).get_text('')) children = [] image_urls_seen = set() image_atts = [] # normalize attachments, render attached notes/articles attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['stream'] = util.get_first(att, 'stream') type = att.get('objectType') if type == 'image': image_atts.append(util.get_first(att, 'image')) continue image_urls_seen |= set(util.get_urls(att, 'image')) if type in ('note', 'article'): html = microformats2.render_content(att, include_location=reader, render_attachments=True) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties', [])) html = '%s: %s' % (name.strip(), html) children.append(html) # render image(s) that we haven't already seen for image in image_atts + util.get_list(obj, 'image'): if not image: continue url = image.get('url') parsed = urllib.parse.urlparse(url) rest = urllib.parse.urlunparse(('', '') + parsed[2:]) img_src_re = re.compile( r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" % (re.escape(parsed.netloc), re.escape(rest))) if (url and url not in image_urls_seen and not img_src_re.search(obj['rendered_content'])): children.append(microformats2.img(url)) image_urls_seen.add(url) obj['rendered_children'] = [ _encode_ampersands(child) for child in children ] # make sure published and updated are strict RFC 3339 timestamps for prop in 'published', 'updated': val = obj.get(prop) if val: obj[prop] = util.maybe_iso8601_to_rfc3339(val) # Atom timestamps are even stricter than RFC 3339: they can't be naive ie # time zone unaware. They must have either an offset or the Z suffix. # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html if not util.TIMEZONE_OFFSET_RE.search(obj[prop]): obj[prop] += 'Z'
def activities_to_jsonfeed(activities, actor=None, title=None, feed_url=None, home_page_url=None): """Converts ActivityStreams activities to a JSON feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title home_page_url: string, the home page URL feed_url: the URL of the JSON Feed, if any. Included in the feed_url field. Returns: dict, JSON Feed data, ready to be JSON-encoded """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, basestring)): raise TypeError('activities may not be a dict or string') def image_url(obj): return util.get_first(obj, 'image', {}).get('url') def actor_name(obj): return obj.get('displayName') or obj.get('username') if not actor: actor = {} items = [] for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue author = obj.get('author', {}) content = obj.get('content') obj_title = obj.get('title') or obj.get('displayName') item = { 'id': obj.get('id') or obj.get('url'), 'url': obj.get('url'), 'image': image_url(obj), 'title': obj_title if mf2util.is_name_a_title(obj_title, content) else None, 'summary': obj.get('summary'), 'content_html': content, 'date_published': obj.get('published'), 'date_modified': obj.get('updated'), 'author': { 'name': actor_name(author), 'url': author.get('url'), 'avatar': image_url(author), }, 'attachments': [], } for att in obj.get('attachments', []): url = (util.get_first(att, 'stream') or util.get_first(att, 'image') or att ).get('url') mime = mimetypes.guess_type(url)[0] if url else None if (att.get('objectType') in ATTACHMENT_TYPES or mime and mime.split('/')[0] in ATTACHMENT_TYPES): item['attachments'].append({ 'url': url or '', 'mime_type': mime, 'title': att.get('title'), }) if not item['content_html']: item['content_text'] = '' items.append(item) return util.trim_nulls({ 'version': 'https://jsonfeed.org/version/1', 'title': title or actor_name(actor) or 'JSON Feed', 'feed_url': feed_url, 'home_page_url': home_page_url or actor.get('url'), 'author': { 'name': actor_name(actor), 'url': actor.get('url'), 'avatar': image_url(actor), }, 'items': items, }, ignore='content_text')
def from_activities(activities, actor=None, title=None, feed_url=None, home_page_url=None, hfeed=None): """Converts ActivityStreams activities to an RSS 2.0 feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title feed_url: string, the URL for this RSS feed home_page_url: string, the home page URL hfeed: dict, parsed mf2 h-feed, if available Returns: unicode string with RSS 2.0 XML """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, str)): raise TypeError('activities may not be a dict or string') fg = FeedGenerator() fg.id(feed_url) assert feed_url fg.link(href=feed_url, rel='self') if home_page_url: fg.link(href=home_page_url, rel='alternate') # TODO: parse language from lang attribute: # https://github.com/microformats/mf2py/issues/150 fg.language('en') fg.generator('granary', uri='https://granary.io/') hfeed = hfeed or {} actor = actor or {} image = (util.get_url(hfeed.get('properties', {}), 'photo') or util.get_url(actor, 'image')) if image: fg.image(image) props = hfeed.get('properties') or {} content = microformats2.get_text(util.get_first(props, 'content', '')) summary = util.get_first(props, 'summary', '') desc = content or summary or '-' fg.description(desc) # required fg.title(title or util.ellipsize(desc)) # required latest = None feed_has_enclosure = False for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue item = fg.add_entry() url = obj.get('url') id = obj.get('id') or url item.id(id) item.link(href=url) item.guid(url, permalink=True) # title (required) title = (obj.get('title') or obj.get('displayName') or util.ellipsize(obj.get('content', '-'))) # strip HTML tags title = util.parse_html(title).get_text('').strip() item.title(title) content = microformats2.render_content(obj, include_location=True, render_attachments=True, render_image=True) if not content: content = obj.get('summary') if content: item.content(content, type='CDATA') categories = [ { 'term': t['displayName'] } for t in obj.get('tags', []) if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share') and t.get('objectType') not in ('article', 'person', 'mention') ] item.category(categories) author = obj.get('author', {}) author = { 'name': author.get('displayName') or author.get('username'), 'uri': author.get('url'), 'email': author.get('email') or '-', } item.author(author) published = obj.get('published') or obj.get('updated') if published and isinstance(published, str): try: dt = mf2util.parse_datetime(published) if not isinstance(dt, datetime): dt = datetime.combine(dt, time.min) if not dt.tzinfo: dt = dt.replace(tzinfo=util.UTC) item.published(dt) if not latest or dt > latest: latest = dt except ValueError: # bad datetime string pass item_has_enclosure = False for att in obj.get('attachments', []): stream = util.get_first(att, 'stream') or att if not stream: continue url = stream.get('url') or '' mime = mimetypes.guess_type(url)[0] or '' if (att.get('objectType') in ENCLOSURE_TYPES or mime and mime.split('/')[0] in ENCLOSURE_TYPES): if item_has_enclosure: logging.info( 'Warning: item %s already has an RSS enclosure, skipping additional enclosure %s', id, url) continue item_has_enclosure = feed_has_enclosure = True item.enclosure(url=url, type=mime, length=str(stream.get('size', ''))) item.load_extension('podcast') duration = stream.get('duration') if duration: item.podcast.itunes_duration(duration) if feed_has_enclosure: fg.load_extension('podcast') fg.podcast.itunes_author( actor.get('displayName') or actor.get('username')) if summary: fg.podcast.itunes_summary(summary) fg.podcast.itunes_explicit('no') fg.podcast.itunes_block(False) name = author.get('name') if name: fg.podcast.itunes_author(name) if image: fg.podcast.itunes_image(image) fg.podcast.itunes_category(categories) if latest: fg.lastBuildDate(latest) return fg.rss_str(pretty=True).decode('utf-8')
def _create(self, obj, preview=None, include_link=source.OMIT_LINK, ignore_formatting=False): """Creates or previews creating a tweet, reply tweet, retweet, or favorite. https://dev.twitter.com/docs/api/1.1/post/statuses/update https://dev.twitter.com/docs/api/1.1/post/statuses/retweet/:id https://dev.twitter.com/docs/api/1.1/post/favorites/create Args: obj: ActivityStreams object preview: boolean include_link: string ignore_formatting: boolean Returns: a CreationResult If preview is True, the content will be a unicode string HTML snippet. If False, it will be a dict with 'id' and 'url' keys for the newly created Twitter object. """ assert preview in (False, True) type = obj.get('objectType') verb = obj.get('verb') base_obj = self.base_object(obj) base_id = base_obj.get('id') base_url = base_obj.get('url') is_reply = type == 'comment' or 'inReplyTo' in obj image_urls = [image.get('url') for image in util.get_list(obj, 'image')] video_url = util.get_first(obj, 'stream', {}).get('url') has_media = (image_urls or video_url) and (type in ('note', 'article') or is_reply) lat = obj.get('location', {}).get('latitude') lng = obj.get('location', {}).get('longitude') # prefer displayName over content for articles type = obj.get('objectType') base_url = self.base_object(obj).get('url') prefer_content = type == 'note' or (base_url and (type == 'comment' or obj.get('inReplyTo'))) content = self._content_for_create(obj, ignore_formatting=ignore_formatting, prefer_name=not prefer_content, strip_first_video_tag=bool(video_url)) if not content: if type == 'activity': content = verb elif has_media: content = '' else: return source.creation_result( abort=False, # keep looking for things to publish, error_plain='No content text found.', error_html='No content text found.') if is_reply and base_url: # Twitter *used* to require replies to include an @-mention of the # original tweet's author # https://dev.twitter.com/docs/api/1.1/post/statuses/update#api-param-in_reply_to_status_id # ...but now we use the auto_populate_reply_metadata query param instead: # https://dev.twitter.com/overview/api/upcoming-changes-to-tweets # the embed URL in the preview can't start with mobile. or www., so just # hard-code it to twitter.com. index #1 is netloc. parsed = urlparse.urlparse(base_url) parts = parsed.path.split('/') if len(parts) < 2 or not parts[1]: raise ValueError('Could not determine author of in-reply-to URL %s' % base_url) reply_to_prefix = '@%s ' % parts[1].lower() if content.lower().startswith(reply_to_prefix): content = content[len(reply_to_prefix):] parsed = list(parsed) parsed[1] = self.DOMAIN base_url = urlparse.urlunparse(parsed) # need a base_url with the tweet id for the embed HTML below. do this # *after* checking the real base_url for in-reply-to author username. if base_id and not base_url: base_url = 'https://twitter.com/-/statuses/' + base_id if is_reply and not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to reply to.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/reply">reply to</a>. ' 'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> ' 'link a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') # truncate and ellipsize content if it's over the character # count. URLs will be t.co-wrapped, so include that when counting. content = self._truncate( content, obj.get('url'), include_link, type) # linkify defaults to Twitter's link shortening behavior preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True) if type == 'activity' and verb == 'like': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to like.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/favorite">favorite</a>. ' 'Check that your post has a like-of link to a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') if preview: return source.creation_result( description='<span class="verb">favorite</span> <a href="%s">' 'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj))) else: data = urllib.urlencode({'id': base_id}) self.urlopen(API_POST_FAVORITE, data=data) resp = {'type': 'like'} elif type == 'activity' and verb == 'share': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to retweet.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/repost">retweet</a>. ' 'Check that your post has a repost-of link to a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') if preview: return source.creation_result( description='<span class="verb">retweet</span> <a href="%s">' 'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj))) else: data = urllib.urlencode({'id': base_id}) resp = self.urlopen(API_POST_RETWEET % base_id, data=data) resp['type'] = 'repost' elif type in ('note', 'article') or is_reply: # a tweet content = unicode(content).encode('utf-8') data = {'status': content} if is_reply: description = \ '<span class="verb">@-reply</span> to <a href="%s">this tweet</a>:\n%s' % ( base_url, self.embed_post(base_obj)) data.update({ 'in_reply_to_status_id': base_id, 'auto_populate_reply_metadata': 'true', }) else: description = '<span class="verb">tweet</span>:' if video_url: preview_content += ('<br /><br /><video controls src="%s"><a href="%s">' 'this video</a></video>' % (video_url, video_url)) if not preview: ret = self.upload_video(video_url) if isinstance(ret, source.CreationResult): return ret data['media_ids'] = ret elif image_urls: num_urls = len(image_urls) if num_urls > MAX_MEDIA: image_urls = image_urls[:MAX_MEDIA] logging.warning('Found %d photos! Only using the first %d: %r', num_urls, MAX_MEDIA, image_urls) preview_content += '<br /><br />' + ' '.join( '<img src="%s" />' % url for url in image_urls) if not preview: ret = self.upload_images(image_urls) if isinstance(ret, source.CreationResult): return ret data['media_ids'] = ','.join(ret) if lat and lng: preview_content += ( '<div>at <a href="https://maps.google.com/maps?q=%s,%s">' '%s, %s</a></div>' % (lat, lng, lat, lng)) data['lat'] = lat data['long'] = lng if preview: return source.creation_result(content=preview_content, description=description) else: resp = self.urlopen(API_POST_TWEET, data=urllib.urlencode(data)) resp['type'] = 'comment' if is_reply else 'post' elif (verb and verb.startswith('rsvp-')) or verb == 'invite': return source.creation_result( abort=True, error_plain='Cannot publish RSVPs to Twitter.', error_html='This looks like an <a href="http://indiewebcamp.com/rsvp">RSVP</a>. ' 'Publishing events or RSVPs to Twitter is not supported.') else: return source.creation_result( abort=False, error_plain='Cannot publish type=%s, verb=%s to Twitter' % (type, verb), error_html='Cannot publish type=%s, verb=%s to Twitter' % (type, verb)) id_str = resp.get('id_str') if id_str: resp.update({'id': id_str, 'url': self.tweet_url(resp)}) elif 'url' not in resp: resp['url'] = base_url return source.creation_result(resp)
def _create(self, obj, preview, include_link=source.OMIT_LINK, ignore_formatting=False): """Creates or previews creating for the previous two methods. https://www.flickr.com/services/api/upload.api.html https://www.flickr.com/services/api/flickr.photos.comments.addComment.html https://www.flickr.com/services/api/flickr.favorites.add.html https://www.flickr.com/services/api/flickr.photos.people.add.html Args: obj: ActivityStreams object preview: boolean include_link: string ignore_formatting: boolean Return: a CreationResult """ # photo, comment, or like type = source.object_type(obj) logging.debug('publishing object type %s to Flickr', type) link_text = '(Originally published at: %s)' % obj.get('url') image_url = util.get_first(obj, 'image', {}).get('url') video_url = util.get_first(obj, 'stream', {}).get('url') content = self._content_for_create( obj, ignore_formatting=ignore_formatting, strip_first_video_tag=bool(video_url)) if (video_url or image_url) and type in ('note', 'article'): name = obj.get('displayName') people = self._get_person_tags(obj) hashtags = [ t.get('displayName') for t in obj.get('tags', []) if t.get('objectType') == 'hashtag' and t.get('displayName') ] lat = obj.get('location', {}).get('latitude') lng = obj.get('location', {}).get('longitude') # if name does not represent an explicit title, then we'll just # use it as the title and wipe out the content if name and content and not mf2util.is_name_a_title(name, content): name = content content = None # add original post link if include_link == source.INCLUDE_LINK: content = ((content + '\n\n') if content else '') + link_text if preview: preview_content = '' if name: preview_content += '<h4>%s</h4>' % name if content: preview_content += '<div>%s</div>' % content if hashtags: preview_content += '<div> %s</div>' % ' '.join( '#' + t for t in hashtags) if people: preview_content += '<div> with %s</div>' % ', '.join( ('<a href="%s">%s</a>' % (p.get('url'), p.get('displayName') or 'User %s' % p.get('id')) for p in people)) if lat and lng: preview_content += '<div> at <a href="https://maps.google.com/maps?q=%s,%s">%s, %s</a></div>' % ( lat, lng, lat, lng) if video_url: preview_content += ( '<video controls src="%s"><a href="%s">this video' '</a></video>' % (video_url, video_url)) else: preview_content += '<img src="%s" />' % image_url return source.creation_result(content=preview_content, description='post') params = [] if name: params.append(('title', name)) if content: params.append(('description', content.encode('utf-8'))) if hashtags: params.append(('tags', ','.join( ('"%s"' % t if ' ' in t else t).encode('utf-8') for t in hashtags))) file = util.urlopen(video_url or image_url) try: resp = self.upload(params, file) except requests.exceptions.ConnectionError as e: if e.args[0].message.startswith( 'Request exceeds 10 MiB limit'): msg = 'Sorry, photos and videos must be under 10MB.' return source.creation_result(error_plain=msg, error_html=msg) else: raise photo_id = resp.get('id') resp.update({ 'type': 'post', 'url': self.photo_url(self.path_alias() or self.user_id(), photo_id), }) if video_url: resp['granary_message'] = \ "Note that videos take time to process before they're visible." # add person tags for person_id in sorted(p.get('id') for p in people): self.call_api_method('flickr.photos.people.add', { 'photo_id': photo_id, 'user_id': person_id, }) # add location if lat and lng: self.call_api_method('flickr.photos.geo.setLocation', { 'photo_id': photo_id, 'lat': lat, 'lon': lng, }) return source.creation_result(resp) base_obj = self.base_object(obj) base_id = base_obj.get('id') base_url = base_obj.get('url') # maybe a comment on a flickr photo? if type == 'comment' or obj.get('inReplyTo'): if not base_id: return source.creation_result( abort=True, error_plain='Could not find a photo to comment on.', error_html= 'Could not find a photo to <a href="http://indiewebcamp.com/reply">comment on</a>. ' 'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> ' 'link to a Flickr photo or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.' ) if include_link == source.INCLUDE_LINK: content += '\n\n' + link_text if preview: return source.creation_result( content=content, description='comment on <a href="%s">this photo</a>.' % base_url) resp = self.call_api_method( 'flickr.photos.comments.addComment', { 'photo_id': base_id, 'comment_text': content.encode('utf-8'), }) resp = resp.get('comment', {}) resp.update({ 'type': 'comment', 'url': resp.get('permalink'), }) return source.creation_result(resp) if type == 'like': if not base_id: return source.creation_result( abort=True, error_plain='Could not find a photo to favorite.', error_html= 'Could not find a photo to <a href="http://indiewebcamp.com/like">favorite</a>. ' 'Check that your post has an <a href="http://indiewebcamp.com/like">like-of</a> ' 'link to a Flickr photo or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.' ) if preview: return source.creation_result( description='favorite <a href="%s">this photo</a>.' % base_url) # this method doesn't return any data self.call_api_method('flickr.favorites.add', { 'photo_id': base_id, }) # TODO should we canonicalize the base_url (e.g. removing trailing path # info like "/in/contacts/") return source.creation_result({ 'type': 'like', 'url': '%s#favorited-by-%s' % (base_url, self.user_id()), }) return source.creation_result( abort=False, error_plain='Cannot publish type=%s to Flickr.' % type, error_html='Cannot publish type=%s to Flickr.' % type)
def _prepare_activity(a, reader=True): """Preprocesses an activity to prepare it to be rendered as Atom. Modifies a in place. Args: a: ActivityStreams 1 activity dict reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. """ act_type = source.object_type(a) obj = util.get_first(a, 'object', default={}) primary = obj if (not act_type or act_type == 'post') else a # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands(microformats2.render_content( primary, include_location=reader, render_attachments=True)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text('')) children = [] image_urls_seen = set() image_atts = [] # normalize actor images for elem in a, obj: actor = elem.get('actor') if actor: actor['image'] = util.get_first(actor, 'image') # normalize attachments, render attached notes/articles attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['stream'] = util.get_first(att, 'stream') type = att.get('objectType') if type == 'image': att['image'] = util.get_first(att, 'image') image_atts.append(att['image']) continue image_urls_seen |= set(util.get_urls(att, 'image')) if type in ('note', 'article'): html = microformats2.render_content(att, include_location=reader, render_attachments=True) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties') or {}) html = '%s: %s' % (name.strip(), html) children.append(html) # render image(s) that we haven't already seen for image in image_atts + util.get_list(obj, 'image'): if not image: continue url = image.get('url') parsed = urllib.parse.urlparse(url) rest = urllib.parse.urlunparse(('', '') + parsed[2:]) img_src_re = re.compile(r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" % (re.escape(parsed.netloc), re.escape(rest))) if (url and url not in image_urls_seen and not img_src_re.search(obj['rendered_content'])): children.append(microformats2.img(url)) image_urls_seen.add(url) obj['rendered_children'] = [_encode_ampersands(child) for child in children] # make sure published and updated are strict RFC 3339 timestamps for prop in 'published', 'updated': val = obj.get(prop) if val: obj[prop] = util.maybe_iso8601_to_rfc3339(val) # Atom timestamps are even stricter than RFC 3339: they can't be naive ie # time zone unaware. They must have either an offset or the Z suffix. # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html if not util.TIMEZONE_OFFSET_RE.search(obj[prop]): obj[prop] += 'Z'
def try_activitypub(self): source = util.get_required_param(self, 'source') # fetch source page, convert to ActivityStreams source_resp = common.requests_get(source) source_url = source_resp.url or source source_mf2 = mf2py.parse(source_resp.text, url=source_url) # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(source_mf2, indent=2)) entry = mf2util.find_first_entry(source_mf2, ['h-entry']) logging.info('First entry: %s', json.dumps(entry, indent=2)) # make sure it has url, since we use that for AS2 id, which is required # for ActivityPub. props = entry.setdefault('properties', {}) if not props.get('url'): props['url'] = [source_url] source_obj = microformats2.json_to_object(entry, fetch_mf2=True) logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2)) # fetch target page as AS object. target is first in-reply-to, like-of, # or repost-of, *not* target query param.) target = util.get_url(util.get_first(source_obj, 'inReplyTo') or util.get_first(source_obj, 'object')) if not target: common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of ' 'found in %s' % source_url) try: target_resp = common.get_as2(target) except (requests.HTTPError, exc.HTTPBadGateway) as e: if (e.response.status_code // 100 == 2 and common.content_type(e.response).startswith('text/html')): self.resp = Response.get_or_create( source=source_url, target=e.response.url or target, direction='out', source_mf2=json.dumps(source_mf2)) return self.send_salmon(source_obj, target_resp=e.response) raise target_url = target_resp.url or target self.resp = Response.get_or_create( source=source_url, target=target_url, direction='out', protocol='activitypub', source_mf2=json.dumps(source_mf2)) # find actor's inbox target_obj = target_resp.json() inbox_url = target_obj.get('inbox') if not inbox_url: # TODO: test actor/attributedTo and not, with/without inbox actor = target_obj.get('actor') or target_obj.get('attributedTo') if isinstance(actor, dict): inbox_url = actor.get('inbox') actor = actor.get('url') if not inbox_url and not actor: common.error(self, 'Target object has no actor or attributedTo URL') if not inbox_url: # fetch actor as AS object actor = common.get_as2(actor).json() inbox_url = actor.get('inbox') if not inbox_url: # TODO: probably need a way to save errors like this so that we can # return them if ostatus fails too. # common.error(self, 'Target actor has no inbox') return self.send_salmon(source_obj, target_resp=target_resp) # convert to AS2 source_domain = urlparse.urlparse(source_url).netloc key = MagicKey.get_or_create(source_domain) source_activity = common.postprocess_as2( as2.from_as1(source_obj), target=target_obj, key=key) if self.resp.status == 'complete': source_activity['type'] = 'Update' # prepare HTTP Signature (required by Mastodon) # https://w3c.github.io/activitypub/#authorization-lds # https://tools.ietf.org/html/draft-cavage-http-signatures-07 # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846 acct = 'acct:%s@%s' % (source_domain, source_domain) auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct, algorithm='rsa-sha256') # deliver source object to target actor's inbox. headers = { 'Content-Type': common.CONTENT_TYPE_AS2, # required for HTTP Signature # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3 'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'), } inbox_url = urlparse.urljoin(target_url, inbox_url) resp = common.requests_post(inbox_url, json=source_activity, auth=auth, headers=headers) self.response.status_int = resp.status_code if resp.status_code == 202: self.response.write('202 response! If this is Mastodon 1.x, their ' 'signature verification probably failed. :(\n') self.response.write(resp.text)
def render_content(obj, include_location=True, synthesize_content=True): """Renders the content of an ActivityStreams object. Includes tags, mentions, and non-note/article attachments. (Note/article attachments are converted to mf2 children in object_to_json and then rendered in json_to_html.) Args: obj: decoded JSON ActivityStreams object include_location: whether to render location, if provided synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: string, rendered HTML """ content = obj.get('content', '') # extract tags. preserve order but de-dupe, ie don't include a tag more than # once. seen_ids = set() mentions = [] tags = {} # maps string objectType to list of tag objects for t in obj.get('tags', []): id = t.get('id') if id and id in seen_ids: continue seen_ids.add(id) if 'startIndex' in t and 'length' in t: mentions.append(t) else: tags.setdefault(source.object_type(t), []).append(t) # linkify embedded mention tags inside content. if mentions: mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = content content = '' for tag in mentions: start = tag['startIndex'] end = start + tag['length'] content += orig[last_end:start] content += '<a href="%s">%s</a>' % (tag['url'], orig[start:end]) last_end = end content += orig[last_end:] # convert newlines to <br>s # do this *after* linkifying tags so we don't have to shuffle indices over content = content.replace('\n', '<br />\n') # linkify embedded links. ignore the "mention" tags that we added ourselves. # TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then # uncomment this. # if content: # content = util.linkify(content) # attachments, e.g. links (aka articles) # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/ attachments = [a for a in obj.get('attachments', []) if a.get('objectType') not in ('note', 'article')] for tag in attachments + tags.pop('article', []): name = tag.get('displayName', '') open_a_tag = False if tag.get('objectType') == 'video': video = util.get_first(tag, 'stream') or util.get_first(obj, 'stream') poster = util.get_first(tag, 'image', {}) if video and video.get('url'): content += '\n<p>%s' % vid(video['url'], poster.get('url'), 'thumbnail') else: content += '\n<p>' url = tag.get('url') or obj.get('url') if url: content += '\n<a class="link" href="%s">' % url open_a_tag = True image = util.get_first(tag, 'image') or util.get_first(obj, 'image') if image and image.get('url'): content += '\n' + img(image['url'], 'thumbnail', name) if name: content += '\n<span class="name">%s</span>' % name if open_a_tag: content += '\n</a>' summary = tag.get('summary') if summary and summary != name: content += '\n<span class="summary">%s</span>' % summary content += '\n</p>' # generate share/like contexts if the activity does not have content # of its own for as_type, verb in [('share', 'Shared'), ('like', 'Likes')]: obj_type = source.object_type(obj) if (not synthesize_content or obj_type != as_type or 'object' not in obj or 'content' in obj): continue targets = util.get_list(obj, 'object') if not targets: continue for target in targets: # sometimes likes don't have enough content to render anything # interesting if 'url' in target and set(target) <= set(['url', 'objectType']): content += '<a href="%s">%s this.</a>' % ( target.get('url'), verb.lower()) else: author = target.get('author', target.get('actor', {})) # special case for twitter RT's if obj_type == 'share' and 'url' in obj and re.search( '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')): content += 'RT <a href="%s">@%s</a> ' % ( target.get('url', '#'), author.get('username')) else: # image looks bad in the simplified rendering author = {k: v for k, v in author.iteritems() if k != 'image'} content += '%s <a href="%s">%s</a> by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get('title', 'a post')), hcard_to_html(object_to_json(author, default_object_type='person')), ) content += render_content(target, include_location=include_location, synthesize_content=synthesize_content) # only include the first context in the content (if there are # others, they'll be included as separate properties) break break # location loc = obj.get('location') if include_location and loc: content += '\n' + hcard_to_html( object_to_json(loc, default_object_type='place'), parent_props=['p-location']) # these are rendered manually in json_to_html() for type in 'like', 'share', 'react', 'person': tags.pop(type, None) # render the rest content += tags_to_html(tags.pop('hashtag', []), 'p-category') content += tags_to_html(tags.pop('mention', []), 'u-mention') content += tags_to_html(sum(tags.values(), []), 'tag') return content
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance( entry_class, basestring) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), 'photo': dedupe_urls( get_urls(attachments, 'image', 'image') + get_urls(primary, 'image')), 'video': dedupe_urls( get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [ object_to_json(author, trim_nulls=False, default_object_type='person') ], 'location': [ object_to_json(primary.get('location', {}), trim_nulls=False, default_object_type='place') ], 'comment': [ object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', []) ], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': [ object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] + attachments['article'] ] } # hashtags and person tags tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ('favorite', 'like'), ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs ] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type ] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def render_content(obj, include_location=True, synthesize_content=True): """Renders the content of an ActivityStreams object. Includes tags, mentions, and non-note/article attachments. (Note/article attachments are converted to mf2 children in object_to_json and then rendered in json_to_html.) Args: obj: decoded JSON ActivityStreams object include_location: whether to render location, if provided synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: string, rendered HTML """ content = obj.get('content', '') # extract tags. preserve order but de-dupe, ie don't include a tag more than # once. seen_ids = set() mentions = [] tags = {} # maps string objectType to list of tag objects for t in obj.get('tags', []): id = t.get('id') if id and id in seen_ids: continue seen_ids.add(id) if 'startIndex' in t and 'length' in t: mentions.append(t) else: tags.setdefault(source.object_type(t), []).append(t) # linkify embedded mention tags inside content. if mentions: mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = content content = '' for tag in mentions: start = tag['startIndex'] end = start + tag['length'] content += orig[last_end:start] content += '<a href="%s">%s</a>' % (tag['url'], orig[start:end]) last_end = end content += orig[last_end:] # convert newlines to <br>s # do this *after* linkifying tags so we don't have to shuffle indices over content = content.replace('\n', '<br />\n') # linkify embedded links. ignore the "mention" tags that we added ourselves. # TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then # uncomment this. # if content: # content = util.linkify(content) # attachments, e.g. links (aka articles) # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/ attachments = [ a for a in obj.get('attachments', []) if a.get('objectType') not in ('note', 'article') ] for tag in attachments + tags.pop('article', []): name = tag.get('displayName', '') open_a_tag = False if tag.get('objectType') == 'video': video = util.get_first(tag, 'stream') or util.get_first( obj, 'stream') poster = util.get_first(tag, 'image', {}) if video and video.get('url'): content += '\n<p>%s' % vid(video['url'], poster.get('url'), 'thumbnail') else: content += '\n<p>' url = tag.get('url') or obj.get('url') if url: content += '\n<a class="link" href="%s">' % url open_a_tag = True image = util.get_first(tag, 'image') or util.get_first( obj, 'image') if image and image.get('url'): content += '\n' + img(image['url'], 'thumbnail', name) if name: content += '\n<span class="name">%s</span>' % name if open_a_tag: content += '\n</a>' summary = tag.get('summary') if summary and summary != name: content += '\n<span class="summary">%s</span>' % summary content += '\n</p>' # generate share/like contexts if the activity does not have content # of its own for as_type, verb in [('share', 'Shared'), ('like', 'Likes')]: obj_type = source.object_type(obj) if (not synthesize_content or obj_type != as_type or 'object' not in obj or 'content' in obj): continue targets = util.get_list(obj, 'object') if not targets: continue for target in targets: # sometimes likes don't have enough content to render anything # interesting if 'url' in target and set(target) <= set(['url', 'objectType']): content += '<a href="%s">%s this.</a>' % (target.get('url'), verb.lower()) else: author = target.get('author', target.get('actor', {})) # special case for twitter RT's if obj_type == 'share' and 'url' in obj and re.search( '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')): content += 'RT <a href="%s">@%s</a> ' % (target.get( 'url', '#'), author.get('username')) else: # image looks bad in the simplified rendering author = { k: v for k, v in author.iteritems() if k != 'image' } content += '%s <a href="%s">%s</a> by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get( 'title', 'a post')), hcard_to_html( object_to_json(author, default_object_type='person')), ) content += render_content( target, include_location=include_location, synthesize_content=synthesize_content) # only include the first context in the content (if there are # others, they'll be included as separate properties) break break # location loc = obj.get('location') if include_location and loc: content += '\n' + hcard_to_html(object_to_json( loc, default_object_type='place'), parent_props=['p-location']) # these are rendered manually in json_to_html() for type in 'like', 'share', 'react', 'person': tags.pop(type, None) # render the rest content += tags_to_html(tags.pop('hashtag', []), 'p-category') content += tags_to_html(tags.pop('mention', []), 'u-mention') content += tags_to_html(sum(tags.values(), []), 'tag') return content
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string, the mf2 class that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get( 'inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # TODO: more tags. most will be p-category? ret = { 'type': (['h-card'] if obj_type == 'person' else ['h-card', 'p-location'] if obj_type == 'place' else [entry_class]), 'properties': { 'uid': [obj.get('id', '')], 'name': [name], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), 'photo': [image.get('url') for image in (util.get_list(obj, 'image') or util.get_list(primary, 'image'))], 'video': [obj.get('stream', primary.get('stream', {})).get('url')], 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'latitude': primary.get('latitude'), 'longitude': primary.get('longitude'), 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], }, 'children': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in primary.get('attachments', []) if c.get('objectType') in ('note', 'article')], } # hashtags and person tags tags = obj.get('tags', []) or util.get_first(obj, 'object', {}).get('tags', []) ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': cls = 'u-category h-card' elif tag.get('objectType') == 'hashtag': cls = 'u-category' else: continue ret['properties']['category'].append(object_to_json(tag, entry_class=cls)) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = util.get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] if trim_nulls: ret = util.trim_nulls(ret) return ret
def jsonfeed_to_activities(jsonfeed): """Converts a JSON feed to ActivityStreams activities and actor. Args: jsonfeed: dict, JSON Feed data Returns: (activities, actor) tuple, where activities and actor are both ActivityStreams object dicts Raises: ValueError, if jsonfeed isn't a valid JSON Feed dict """ if not hasattr(jsonfeed, 'get'): raise ValueError('Expected dict (or compatible), got %s' % jsonfeed.__class__.__name__) author = jsonfeed.get('author', {}) actor = { 'objectType': 'person', 'url': author.get('url'), 'image': [{ 'url': author.get('avatar') }], 'displayName': author.get('name'), } def attachment(jf): if not hasattr(jf, 'get'): raise ValueError('Expected attachment to be dict; got %s' % jf) url = jf.get('url') type = jf.get('mime_type', '').split('/')[0] as1 = { 'objectType': type, 'title': jf.get('title'), } if type in ('audio', 'video'): as1['stream'] = {'url': url} else: as1['url'] = url return as1 activities = [{ 'object': { 'objectType': 'article' if item.get('title') else 'note', 'title': item.get('title'), 'summary': item.get('summary'), 'content': util.get_first(item, 'content_html') or util.get_first(item, 'content_text'), 'id': str(item.get('id') or ''), 'published': item.get('date_published'), 'updated': item.get('date_modified'), 'url': item.get('url'), 'image': [{ 'url': item.get('image') }], 'author': { 'displayName': item.get('author', {}).get('name'), 'image': [{ 'url': item.get('author', {}).get('avatar') }] }, 'attachments': [attachment(a) for a in item.get('attachments', [])], } } for item in jsonfeed.get('items', [])] return (util.trim_nulls(activities), util.trim_nulls(actor))
def from_activities(activities, actor=None, title=None, feed_url=None, home_page_url=None, hfeed=None): """Converts ActivityStreams activities to an RSS 2.0 feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title feed_url: string, the URL for this RSS feed home_page_url: string, the home page URL hfeed: dict, parsed mf2 h-feed, if available Returns: unicode string with RSS 2.0 XML """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, basestring)): raise TypeError('activities may not be a dict or string') fg = FeedGenerator() fg.id(feed_url) assert feed_url fg.link(href=feed_url, rel='self') if home_page_url: fg.link(href=home_page_url, rel='alternate') # TODO: parse language from lang attribute: # https://github.com/microformats/mf2py/issues/150 fg.language('en') fg.generator('granary', uri='https://granary.io/') hfeed = hfeed or {} actor = actor or {} image = util.get_url(hfeed, 'image') or util.get_url(actor, 'image') if image: fg.image(image) props = hfeed.get('properties') or {} content = microformats2.get_text(util.get_first(props, 'content', '')) summary = util.get_first(props, 'summary', '') desc = content or summary or '-' fg.description(desc) # required fg.title(title or util.ellipsize(desc)) # required latest = None enclosures = False for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue item = fg.add_entry() url = obj.get('url') item.id(obj.get('id') or url) item.link(href=url) item.guid(url, permalink=True) item.title(obj.get('title') or obj.get('displayName') or '-') # required content = microformats2.render_content( obj, include_location=True, render_attachments=False) or obj.get('summary') if content: item.content(content, type='CDATA') item.category( [{'term': t['displayName']} for t in obj.get('tags', []) if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share')]) author = obj.get('author', {}) item.author({ 'name': author.get('displayName') or author.get('username'), 'uri': author.get('url'), }) published = obj.get('published') or obj.get('updated') if published: try: dt = mf2util.parse_datetime(published) if not isinstance(dt, datetime): dt = datetime.combine(dt, time.min) if not dt.tzinfo: dt = dt.replace(tzinfo=util.UTC) item.published(dt) if not latest or dt > latest: latest = dt except ValueError: # bad datetime string pass for att in obj.get('attachments', []): stream = util.get_first(att, 'stream') or att if not stream: continue url = stream.get('url') or '' mime = mimetypes.guess_type(url)[0] or '' if (att.get('objectType') in ENCLOSURE_TYPES or mime and mime.split('/')[0] in ENCLOSURE_TYPES): enclosures = True item.enclosure(url=url, type=mime, length='REMOVEME') # TODO: length (bytes) item.load_extension('podcast') duration = stream.get('duration') if duration: item.podcast.itunes_duration(duration) if enclosures: fg.load_extension('podcast') fg.podcast.itunes_author(actor.get('displayName') or actor.get('username')) if summary: fg.podcast.itunes_summary(summary) fg.podcast.itunes_explicit('no') fg.podcast.itunes_block(False) if latest: fg.lastBuildDate(latest) return fg.rss_str(pretty=True).decode('utf-8').replace(' length="REMOVEME"', '')
def activities_to_jsonfeed(activities, actor=None, title=None, feed_url=None, home_page_url=None): """Converts ActivityStreams activities to a JSON feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title home_page_url: string, the home page URL feed_url: the URL of the JSON Feed, if any. Included in the feed_url field. Returns: dict, JSON Feed data, ready to be JSON-encoded """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, basestring)): raise TypeError('activities may not be a dict or string') def image_url(obj): return util.get_first(obj, 'image', {}).get('url') def actor_name(obj): return obj.get('displayName') or obj.get('username') if not actor: actor = {} items = [] for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue author = obj.get('author', {}) content = obj.get('content') # The JSON Feed spec (https://jsonfeed.org/version/1#items) says that the # URL from the "image" property may also appear in "content_html", in which # case it should be interpreted as the "main, featured image" of the # post. It does not specify the behavior or semantics in the case that the # image does *not* appear in "content_html", but currently at least one # feed reader (Feedbin) will not display the image as part of the post # content unless it is explicitly included in "content_html". if content and image_url(obj): content += HTML_IMAGE_TEMPLATE.format(image_url(obj)) obj_title = obj.get('title') or obj.get('displayName') item = { 'id': obj.get('id') or obj.get('url'), 'url': obj.get('url'), 'image': image_url(obj), 'title': obj_title if mf2util.is_name_a_title(obj_title, content) else None, 'summary': obj.get('summary'), 'content_html': content, 'date_published': obj.get('published'), 'date_modified': obj.get('updated'), 'author': { 'name': actor_name(author), 'url': author.get('url'), 'avatar': image_url(author), }, 'attachments': [], } for att in obj.get('attachments', []): url = (util.get_first(att, 'stream') or util.get_first(att, 'image') or att).get('url') mime = mimetypes.guess_type(url)[0] if url else None if (att.get('objectType') in ATTACHMENT_TYPES or mime and mime.split('/')[0] in ATTACHMENT_TYPES): item['attachments'].append({ 'url': url or '', 'mime_type': mime, 'title': att.get('title'), }) if not item['content_html']: item['content_text'] = '' items.append(item) return util.trim_nulls( { 'version': 'https://jsonfeed.org/version/1', 'title': title or actor_name(actor) or 'JSON Feed', 'feed_url': feed_url, 'home_page_url': home_page_url or actor.get('url'), 'author': { 'name': actor_name(actor), 'url': actor.get('url'), 'avatar': image_url(actor), }, 'items': items, }, ignore='content_text')
def activities_to_jsonfeed(activities, actor=None, title=None, feed_url=None, home_page_url=None): """Converts ActivityStreams activities to a JSON feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title home_page_url: string, the home page URL feed_url: the URL of the JSON Feed, if any. Included in the feed_url field. Returns: dict, JSON Feed data, ready to be JSON-encoded """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, basestring)): raise TypeError('activities may not be a dict or string') def image_url(obj): return util.get_first(obj, 'image', {}).get('url') def actor_name(obj): return obj.get('displayName') or obj.get('username') if not actor: actor = {} items = [] for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue author = obj.get('author', {}) content = microformats2.render_content( obj, include_location=True, render_attachments=True) obj_title = obj.get('title') or obj.get('displayName') item = { 'id': obj.get('id') or obj.get('url'), 'url': obj.get('url'), 'image': image_url(obj), 'title': obj_title if mf2util.is_name_a_title(obj_title, content) else None, 'summary': obj.get('summary'), 'content_html': content, 'date_published': obj.get('published'), 'date_modified': obj.get('updated'), 'author': { 'name': actor_name(author), 'url': author.get('url'), 'avatar': image_url(author), }, 'attachments': [], } for att in obj.get('attachments', []): url = (util.get_first(att, 'stream') or util.get_first(att, 'image') or att ).get('url') mime = mimetypes.guess_type(url)[0] if url else None if (att.get('objectType') in ATTACHMENT_TYPES or mime and mime.split('/')[0] in ATTACHMENT_TYPES): item['attachments'].append({ 'url': url or '', 'mime_type': mime, 'title': att.get('title'), }) if not item['content_html']: item['content_text'] = '' items.append(item) return util.trim_nulls({ 'version': 'https://jsonfeed.org/version/1', 'title': title or actor_name(actor) or 'JSON Feed', 'feed_url': feed_url, 'home_page_url': home_page_url or actor.get('url'), 'author': { 'name': actor_name(actor), 'url': actor.get('url'), 'avatar': image_url(actor), }, 'items': items, }, ignore='content_text')
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string or sequence, the mf2 class(es) that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get('inReplyTo') or [] if not in_reply_tos: context = obj.get('context') if context and isinstance(context, dict): in_reply_tos = context.get('inReplyTo') or [] is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # maps objectType to list of objects attachments = defaultdict(list) for prop in 'attachments', 'tags': for elem in get_list(primary, prop): attachments[elem.get('objectType')].append(elem) # prefer duration and size from object's stream, then first video, then first # audio stream = {} for candidate in [obj] + attachments['video'] + attachments['audio']: for stream in get_list(candidate, 'stream'): if stream: break duration = stream.get('duration') if duration is not None: if util.is_int(duration): duration = str(duration) else: logging('Ignoring duration %r; expected int, got %s', duration.__class__) duration = None sizes = [] size = stream.get('size') if size: sizes = [str(size)] # construct mf2! ret = { 'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(entry_class, str) else list(entry_class)), 'properties': { 'uid': [obj.get('id') or ''], 'numeric-id': [obj.get('numeric_id') or ''], 'name': [name], 'nickname': [obj.get('username') or ''], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), # photo is special cased below, to handle alt 'video': dedupe_urls(get_urls(attachments, 'video', 'stream') + get_urls(primary, 'stream')), 'audio': get_urls(attachments, 'audio', 'stream'), 'duration': [duration], 'size': sizes, 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], 'start': [primary.get('startTime')], 'end': [primary.get('endTime')], }, 'children': ( # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that # something is being "quoted," like in a quote tweet, so i cheat and use # extra knowledge here that quoted tweets are converted to note # attachments, but URLs in the tweet text are converted to article tags. [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite']) for a in attachments['note'] if 'startIndex' not in a] + [object_to_json(a, trim_nulls=False, entry_class=['h-cite']) for a in attachments['article'] if 'startIndex' not in a]) } # content. emulate e- vs p- microformats2 parsing: e- if there are HTML tags, # otherwise p-. # https://indiewebcamp.com/note#Indieweb_whitespace_thinking text = xml.sax.saxutils.unescape(primary.get('content', '')) html = render_content(primary, include_location=False, synthesize_content=synthesize_content) if '<' in html: ret['properties']['content'] = [{'value': text, 'html': html}] else: ret['properties']['content'] = [text] # photos, including alt text photo_urls = set() ret['properties']['photo'] = [] for image in get_list(attachments, 'image') + [primary]: for url in get_urls(image, 'image'): if url and url not in photo_urls: photo_urls.add(url) name = get_first(image, 'image', {}).get('displayName') ret['properties']['photo'].append({'value': url, 'alt': name} if name else url) # hashtags and person tags if obj_type == 'tag': ret['properties']['tag-of'] = util.get_urls(obj, 'target') tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', []) if not tags and obj_type == 'tag': tags = util.get_list(obj, 'object') ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': ret['properties']['category'].append( object_to_json(tag, entry_class='u-category h-card')) elif tag.get('objectType') == 'hashtag' or obj_type == 'tag': name = tag.get('displayName') if name: ret['properties']['category'].append(name) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ( ('favorite', 'like'), ('follow', 'follow'), ('like', 'like'), ('share', 'repost'), ): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] # latitude & longitude lat = long = None position = ISO_6709_RE.match(primary.get('position') or '') if position: lat, long = position.groups() if not lat: lat = primary.get('latitude') if not long: long = primary.get('longitude') if lat: ret['properties']['latitude'] = [str(lat)] if long: ret['properties']['longitude'] = [str(long)] if trim_nulls: ret = util.trim_nulls(ret) return ret
def _activitypub_targets(self): """ Returns: list of (Response, string inbox URL) """ # if there's in-reply-to, like-of, or repost-of, they're the targets. # otherwise, it's all followers' inboxes. targets = self._targets() if not targets: # interpret this as a Create or Update, deliver it to followers inboxes = [] for follower in Follower.query().filter( Follower.key > Key('Follower', self.source_domain + ' '), Follower.key < Key( 'Follower', self.source_domain + chr(ord(' ') + 1))): if follower.status != 'inactive' and follower.last_follow: actor = json_loads(follower.last_follow).get('actor') if actor and isinstance(actor, dict): inboxes.append( actor.get('endpoints', {}).get('sharedInbox') or actor.get('publicInbox') or actor.get('inbox')) return [(Response.get_or_create(source=self.source_url, target=inbox, direction='out', protocol='activitypub', source_mf2=json_dumps( self.source_mf2)), inbox) for inbox in inboxes if inbox] resps_and_inbox_urls = [] for target in targets: # fetch target page as AS2 object try: self.target_resp = common.get_as2(target) except (requests.HTTPError, exc.HTTPBadGateway) as e: self.target_resp = getattr(e, 'response', None) if self.target_resp and self.target_resp.status_code // 100 == 2: content_type = common.content_type(self.target_resp) or '' if content_type.startswith('text/html'): # TODO: pass e.response to try_salmon()'s target_resp continue # give up raise target_url = self.target_resp.url or target resp = Response.get_or_create(source=self.source_url, target=target_url, direction='out', protocol='activitypub', source_mf2=json_dumps( self.source_mf2)) # find target's inbox target_obj = self.target_resp.json() resp.target_as2 = json_dumps(target_obj) inbox_url = target_obj.get('inbox') if not inbox_url: # TODO: test actor/attributedTo and not, with/without inbox actor = (util.get_first(target_obj, 'actor') or util.get_first(target_obj, 'attributedTo')) if isinstance(actor, dict): inbox_url = actor.get('inbox') actor = actor.get('url') or actor.get('id') if not inbox_url and not actor: self.error( 'Target object has no actor or attributedTo with URL or id.' ) elif not isinstance(actor, str): self.error( 'Target actor or attributedTo has unexpected url or id object: %r' % actor) if not inbox_url: # fetch actor as AS object actor = common.get_as2(actor).json() inbox_url = actor.get('inbox') if not inbox_url: # TODO: probably need a way to save errors like this so that we can # return them if ostatus fails too. # self.error('Target actor has no inbox') continue inbox_url = urllib.parse.urljoin(target_url, inbox_url) resps_and_inbox_urls.append((resp, inbox_url)) return resps_and_inbox_urls
def dispatch_request(self, site): logger.info(f'Params: {list(request.values.items())}') # strip fragments from source and target url self.source_url = urllib.parse.urldefrag(request.form['source'])[0] self.target_url = urllib.parse.urldefrag(request.form['target'])[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: self.error(f'Could not parse target URL {self.target_url}') # look up source by domain source_cls = models.sources[site] domain = domain.lower() self.source = (source_cls.query() .filter(source_cls.domains == domain) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: # check for a rel-canonical link. Blogger uses these when it serves a post # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs # epeus.blogspot.com. # https://github.com/snarfed/bridgy/issues/805 mf2 = self.fetch_mf2(self.target_url, require_mf2=False) if not mf2: # fetch_mf2() already wrote the error response return domains = util.dedupe_urls( util.domain_from_link(url) for url in mf2[1]['rels'].get('canonical', [])) if domains: self.source = (source_cls.query() .filter(source_cls.domains.IN(domains)) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: self.error( f'Could not find {source_cls.GR_CLASS.NAME} account for {domain}. Is it registered with Bridgy?') # check that the target URL path is supported target_path = urllib.parse.urlparse(self.target_url).path if target_path in ('', '/'): msg = 'Home page webmentions are not currently supported.' logger.info(msg) return {'error': msg}, 202 for pattern in self.source.PATH_BLOCKLIST: if pattern.match(target_path): msg = f'{self.source.GR_CLASS.NAME} webmentions are not supported for URL path: {target_path}' logger.info(msg) return {'error': msg}, 202 # create BlogWebmention entity id = f'{self.source_url} {self.target_url}' self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported return self.entity.published logger.debug(f'BlogWebmention entity: {self.entity.key.urlsafe().decode()}') # fetch source page fetched = self.fetch_mf2(self.source_url) if not fetched: return resp, mf2 = fetched item = self.find_mention_item(mf2.get('items', [])) if not item: self.error(f'Could not find target URL {self.target_url} in source page {resp.url}', data=mf2, log_exception=False) # default author to target domain author_name = domain author_url = f'http://{domain}/' # extract author name and URL from h-card, if any props = item['properties'] author = get_first(props, 'author') if author: if isinstance(author, str): author_name = author else: author_props = author.get('properties', {}) author_name = get_first(author_props, 'name') author_url = get_first(author_props, 'url') # if present, u-url overrides source url u_url = get_first(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += f' <br /> <a href="{source_url}">via {util.domain_from_link(source_url)}</a>' # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception as e: code, body = util.interpret_http_exception(e) msg = f'Error: {code}: {e}; {body}' if code == '401': logger.warning(f'Disabling source due to: {e}', exc_info=True) self.source.status = 'disabled' self.source.put() self.error(msg, status=code, report=self.source.is_beta_user()) elif code == '404': # post is gone self.error(msg, status=code, report=False) elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): self.error(msg, status=502, report=False) elif code or body: self.error(msg, status=code, report=True) else: raise # write results to datastore self.entity.status = 'complete' self.entity.put() return self.entity.published
def _create(self, obj, preview=None, include_link=False, ignore_formatting=False): """Creates or previews creating a tweet, reply tweet, retweet, or favorite. https://dev.twitter.com/docs/api/1.1/post/statuses/update https://dev.twitter.com/docs/api/1.1/post/statuses/retweet/:id https://dev.twitter.com/docs/api/1.1/post/favorites/create Args: obj: ActivityStreams object preview: boolean include_link: boolean Returns: a CreationResult If preview is True, the content will be a unicode string HTML snippet. If False, it will be a dict with 'id' and 'url' keys for the newly created Twitter object. """ assert preview in (False, True) type = obj.get('objectType') verb = obj.get('verb') base_obj = self.base_object(obj) base_id = base_obj.get('id') base_url = base_obj.get('url') is_reply = type == 'comment' or 'inReplyTo' in obj image_urls = [image.get('url') for image in util.get_list(obj, 'image')] video_url = util.get_first(obj, 'stream', {}).get('url') has_media = (image_urls or video_url) and (type in ('note', 'article') or is_reply) lat = obj.get('location', {}).get('latitude') lng = obj.get('location', {}).get('longitude') # prefer displayName over content for articles type = obj.get('objectType') base_url = self.base_object(obj).get('url') prefer_content = type == 'note' or (base_url and (type == 'comment' or obj.get('inReplyTo'))) content = self._content_for_create(obj, ignore_formatting=ignore_formatting, prefer_name=not prefer_content, strip_first_video_tag=bool(video_url)) if not content: if type == 'activity': content = verb elif has_media: content = '' else: return source.creation_result( abort=False, # keep looking for things to publish, error_plain='No content text found.', error_html='No content text found.') if is_reply and base_url: # extract username from in-reply-to URL so we can @-mention it, if it's # not already @-mentioned, since Twitter requires that to make our new # tweet a reply. # https://dev.twitter.com/docs/api/1.1/post/statuses/update#api-param-in_reply_to_status_id # TODO: this doesn't handle an in-reply-to username that's a prefix of # another username already mentioned, e.g. in reply to @foo when content # includes @foobar. parsed = urlparse.urlparse(base_url) parts = parsed.path.split('/') if len(parts) < 2 or not parts[1]: raise ValueError('Could not determine author of in-reply-to URL %s' % base_url) mention = '@' + parts[1] if mention.lower() not in content.lower(): content = mention + ' ' + content # the embed URL in the preview can't start with mobile. or www., so just # hard-code it to twitter.com. index #1 is netloc. parsed = list(parsed) parsed[1] = self.DOMAIN base_url = urlparse.urlunparse(parsed) # need a base_url with the tweet id for the embed HTML below. do this # *after* checking the real base_url for in-reply-to author username. if base_id and not base_url: base_url = 'https://twitter.com/-/statuses/' + base_id if is_reply and not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to reply to.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/reply">reply to</a>. ' 'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> ' 'link a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') # truncate and ellipsize content if it's over the character # count. URLs will be t.co-wrapped, so include that when counting. include_url = obj.get('url') if include_link else None content = self._truncate(content, include_url, has_media) # linkify defaults to Twitter's link shortening behavior preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True) if type == 'activity' and verb == 'like': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to like.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/favorite">favorite</a>. ' 'Check that your post has a like-of link to a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') if preview: return source.creation_result( description='<span class="verb">favorite</span> <a href="%s">' 'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj))) else: data = urllib.urlencode({'id': base_id}) self.urlopen(API_POST_FAVORITE, data=data) resp = {'type': 'like'} elif type == 'activity' and verb == 'share': if not base_url: return source.creation_result( abort=True, error_plain='Could not find a tweet to retweet.', error_html='Could not find a tweet to <a href="http://indiewebcamp.com/repost">retweet</a>. ' 'Check that your post has a repost-of link to a Twitter URL or to an original post that publishes a ' '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.') if preview: return source.creation_result( description='<span class="verb">retweet</span> <a href="%s">' 'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj))) else: data = urllib.urlencode({'id': base_id}) resp = self.urlopen(API_POST_RETWEET % base_id, data=data) resp['type'] = 'repost' elif type in ('note', 'article') or is_reply: # a tweet content = unicode(content).encode('utf-8') data = {'status': content} if is_reply: description = \ '<span class="verb">@-reply</span> to <a href="%s">this tweet</a>:\n%s' % ( base_url, self.embed_post(base_obj)) data['in_reply_to_status_id'] = base_id else: description = '<span class="verb">tweet</span>:' if video_url: preview_content += ('<br /><br /><video controls src="%s"><a href="%s">' 'this video</a></video>' % (video_url, video_url)) if not preview: ret = self.upload_video(video_url) if isinstance(ret, source.CreationResult): return ret data['media_ids'] = ret elif image_urls: num_urls = len(image_urls) if num_urls > MAX_MEDIA: image_urls = image_urls[:MAX_MEDIA] logging.warning('Found %d photos! Only using the first %d: %r', num_urls, MAX_MEDIA, image_urls) preview_content += '<br /><br />' + ' '.join( '<img src="%s" />' % url for url in image_urls) if not preview: data['media_ids'] = ','.join(self.upload_images(image_urls)) if lat and lng: preview_content += ( '<div>at <a href="https://maps.google.com/maps?q=%s,%s">' '%s, %s</a></div>' % (lat, lng, lat, lng)) data['lat'] = lat data['long'] = lng if preview: return source.creation_result(content=preview_content, description=description) else: resp = self.urlopen(API_POST_TWEET, data=urllib.urlencode(data)) resp['type'] = 'comment' if is_reply else 'post' elif (verb and verb.startswith('rsvp-')) or verb == 'invite': return source.creation_result( abort=True, error_plain='Cannot publish RSVPs to Twitter.', error_html='This looks like an <a href="http://indiewebcamp.com/rsvp">RSVP</a>. ' 'Publishing events or RSVPs to Twitter is not supported.') else: return source.creation_result( abort=False, error_plain='Cannot publish type=%s, verb=%s to Twitter' % (type, verb), error_html='Cannot publish type=%s, verb=%s to Twitter' % (type, verb)) id_str = resp.get('id_str') if id_str: resp.update({'id': id_str, 'url': self.tweet_url(resp)}) elif 'url' not in resp: resp['url'] = base_url return source.creation_result(resp)