def postprocess_activity(self, activity): """Does source-independent post-processing of an activity, in place. Right now just populates the title field. Args: activity: activity dict """ activity = util.trim_nulls(activity) # maps object type to human-readable name to use in title TYPE_DISPLAY_NAMES = {'image': 'photo', 'product': 'gift'} # maps verb to human-readable verb DISPLAY_VERBS = {'like': 'likes', 'listen': 'listened to', 'play': 'watched', 'read': 'read', 'give': 'gave'} actor_name = self.actor_name(activity.get('actor')) obj = activity.get('object') if obj and not activity.get('title'): verb = DISPLAY_VERBS.get(activity['verb']) obj_name = obj.get('displayName') if obj_name and not verb: activity['title'] = obj_name else: app = activity.get('generator', {}).get('displayName') obj_type = TYPE_DISPLAY_NAMES.get(obj.get('objectType'), 'unknown') name = obj_name if obj_name else 'a %s' % obj_type app = ' on %s' % app if app else '' activity['title'] = '%s %s %s%s.' % (actor_name, verb or 'posted', name, app) return util.trim_nulls(activity)
def jsonfeed_to_activities(jsonfeed): """Converts a JSON feed to ActivityStreams activities and actor. Args: jsonfeed: dict, JSON Feed data Returns: (activities, actor) tuple, where activities and actor are both ActivityStreams object dicts Raises: ValueError, if jsonfeed isn't a valid JSON Feed dict """ if not hasattr(jsonfeed, 'get'): raise ValueError('Expected dict (or compatible), got %s' % jsonfeed.__class__.__name__) author = jsonfeed.get('author', {}) actor = { 'objectType': 'person', 'url': author.get('url'), 'image': [{'url': author.get('avatar')}], 'displayName': author.get('name'), } def attachment(jf): url = jf.get('url') type = jf.get('mime_type', '').split('/')[0] as1 = { 'objectType': type, 'title': jf.get('title'), } if type in ('audio', 'video'): as1['stream'] = {'url': url} else: as1['url'] = url return as1 activities = [{'object': { 'objectType': 'article' if item.get('title') else 'note', 'title': item.get('title'), 'summary': item.get('summary'), 'content': item.get('content_html') or item.get('content_text'), 'id': str(item.get('id') or ''), 'published': item.get('date_published'), 'updated': item.get('date_modified'), 'url': item.get('url'), 'image': [{'url': item.get('image')}], 'author': { 'displayName': item.get('author', {}).get('name'), 'image': [{'url': item.get('author', {}).get('avatar')}] }, 'attachments': [attachment(a) for a in item.get('attachments', [])], }} for item in jsonfeed.get('items', [])] return (util.trim_nulls(activities), util.trim_nulls(actor))
def postprocess_object(self, obj): """Does source-independent post-processing of an object, in place. Right now just populates the displayName field. Args: object: object dict """ verb = obj.get('verb') content = obj.get('content') rsvp_content = RSVP_CONTENTS.get(verb) if rsvp_content and not content: if verb.startswith('rsvp-'): content = obj['content'] = '<data class="p-rsvp" value="%s">%s</data>' % ( verb.split('-')[1], rsvp_content) else: content = obj['content'] = rsvp_content if content and not obj.get('displayName'): actor_name = self.actor_name(obj.get('author') or obj.get('actor')) if verb in ('like', 'share'): obj['displayName'] = '%s %s' % (actor_name, content) elif rsvp_content: if verb == 'invite': actor_name = self.actor_name(obj.get('object')) obj['displayName'] = '%s %s' % (actor_name, rsvp_content) else: obj['displayName'] = util.ellipsize(content) return util.trim_nulls(obj)
def object_urls(obj): """Returns an object's unique URLs, preserving order. """ if isinstance(obj, basestring): return obj return uniquify(util.trim_nulls( [obj.get('url')] + [u.get('value') for u in obj.get('urls', [])]))
def user_to_actor(self, user): """Converts a user to an actor. Args: user: JSON object from the Instagram API Returns: an ActivityStreams actor dict, ready to be JSON-encoded """ if not user: return {} id = user.get('id') username = user.get('username') actor = { 'id': self.tag_uri(id or username), 'username': username, } if not id or not username: return actor url = user.get('website') if not url: url = self.user_url(username) actor.update({ 'objectType': 'person', 'displayName': user.get('full_name') or username, 'image': {'url': user.get('profile_picture')}, 'url': url, 'description': user.get('bio') }) return util.trim_nulls(actor)
def user_to_actor(self, user): """Converts a tweet to an activity. Args: user: dict, a decoded JSON Twitter user Returns: an ActivityStreams actor dict, ready to be JSON-encoded """ username = user.get('screen_name') if not username: return {} url = user.get('url') if url: for entity in user.get('entities', {}).get('url', {}).get('urls', []): expanded = entity.get('expanded_url') if entity['url'] == url and expanded: url = expanded else: url = self.user_url(username) return util.trim_nulls({ 'displayName': user.get('name'), 'image': {'url': user.get('profile_image_url')}, 'id': self.tag_uri(username), # numeric_id is our own custom field that always has the source's numeric # user id, if available. 'numeric_id': user.get('id_str'), 'published': self.rfc2822_to_iso8601(user.get('created_at')), 'url': url, 'location': {'displayName': user.get('location')}, 'username': username, 'description': user.get('description'), })
def user_to_actor(self, user): """Converts a user to an actor. Args: user: python_instagram.models.User or dict Returns: an ActivityStreams actor dict, ready to be JSON-encoded """ if not user: return {} elif isinstance(user, dict): user = python_instagram.models.User.object_from_dictionary(user) id = getattr(user, "id", None) username = getattr(user, "username", None) actor = {"id": self.tag_uri(id or username), "username": username} if not id or not username: return actor url = getattr(user, "website", None) if not url: url = "http://instagram.com/" + username actor.update( { "objectType": "person", "displayName": user.full_name, "image": {"url": user.profile_picture}, "url": url, "description": getattr(user, "bio", None), } ) return util.trim_nulls(actor)
def original_post_discovery(activity): """Discovers original post links and stores them as tags, in place. This is a variation on http://indiewebcamp.com/original-post-discovery . It differs in that it finds multiple candidate links instead of one, and it doesn't bother looking for MF2 (etc) markup because the silos don't let you input it. Args: activity: activity dict """ obj = activity.get('object') or activity content = obj.get('content', '').strip() # Permashortcitations are short references to canonical copies of a given # (usually syndicated) post, of the form (DOMAIN PATH). Details: # http://indiewebcamp.com/permashortcitation pscs = set(match.expand(r'http://\1/\2') for match in Source._PERMASHORTCITATION_RE.finditer(content)) attachments = set(a.get('url') for a in obj.get('attachments', []) if a['objectType'] == 'article') urls = util.trim_nulls(util.extract_links(content) | attachments | pscs) obj.setdefault('tags', []).extend({'objectType': 'article', 'url': u} for u in urls) return activity
def user_to_actor(self, user): """Converts a user or page to an actor. Args: user: dict, a decoded JSON Facebook user or page Returns: an ActivityStreams actor dict, ready to be JSON-encoded """ if not user: return {} id = user.get('id') username = user.get('username') handle = username or id if not handle: return {} # facebook implements this as a 302 redirect actor = { # FB only returns the type field if you fetch the object with ?metadata=1 # https://developers.facebook.com/docs/graph-api/using-graph-api/v2.2#introspection 'objectType': 'page' if user.get('type') == 'page' else 'person', 'displayName': user.get('name') or username, 'id': self.tag_uri(handle), 'updated': util.maybe_iso8601_to_rfc3339(user.get('updated_time')), 'username': username, 'description': user.get('bio') or user.get('description'), 'summary': user.get('about'), } # numeric_id is our own custom field that always has the source's numeric # user id, if available. if util.is_int(id): actor.update({ 'numeric_id': id, 'image': { 'url': 'https://graph.facebook.com/v2.2/%s/picture?type=large' % id, }, }) # extract web site links. extract_links uniquifies and preserves order urls = util.extract_links(user.get('website')) if not urls: urls = util.extract_links(user.get('link')) or [self.user_url(handle)] actor['url'] = urls[0] if len(urls) > 1: actor['urls'] = [{'value': u} for u in urls] location = user.get('location') if location: actor['location'] = {'id': location.get('id'), 'displayName': location.get('name')} return util.trim_nulls(actor)
def urlopen_batch_full(self, requests): """Sends a batch of multiple API calls using Facebook's batch API. Similar to urlopen_batch(), but the requests arg and return value are dicts with headers, HTTP status code, etc. Only raises urllib2.HTTPError if the outer batch request itself returns an HTTP error. https://developers.facebook.com/docs/graph-api/making-multiple-requests Args: requests: sequence of dict requests in Facebook's batch format, except that headers is a single dict, not a list of dicts. [{'relative_url': 'me/feed', 'headers': {'ETag': 'xyz', ...}, }, ... ] Returns: sequence of dict responses in Facebook's batch format, except that body is JSON-decoded if possible, and headers is a single dict, not a list of dicts. [{'code': 200, 'headers': {'ETag': 'xyz', ...}, 'body': {...}, }, ... ] """ for req in requests: if 'method' not in req: req['method'] = 'GET' if 'headers' in req: req['headers'] = [{'name': n, 'value': v} for n, v in req['headers'].items()] data = 'batch=' + json.dumps(util.trim_nulls(requests), separators=(',', ':')) # no whitespace resps = self.urlopen('', data=data) for resp in resps: if 'headers' in resp: resp['headers'] = {h['name']: h['value'] for h in resp['headers']} body = resp.get('body') if body: try: resp['body'] = json.loads(body) except (ValueError, TypeError): pass return resps
def get_comment(self, comment_id, activity_id=None, activity_author_id=None): """Returns an ActivityStreams comment object. Args: comment_id: string comment id activity_id: string activity id, optional activity_author_id: string activity author id. Ignored. """ media = util.trim_nulls(self.urlopen(API_MEDIA_URL % activity_id) or {}) for comment in media.get('comments', {}).get('data', []): if comment.get('id') == comment_id: return self.comment_to_object(comment, activity_id, media.get('link'))
def is_public(obj): """Returns True if the object is public, False if private, None if unknown. ...according to the Audience Targeting extension https://developers.google.com/+/api/latest/activities/list#collection Expects values generated by this library: objectType group, alias @public or @private. Also, important point: this defaults to true, ie public. Bridgy depends on that and prunes the to field from stored activities in Response objects (in bridgy/util.prune_activity()). If the default here ever changes, be sure to update Bridgy's code. """ to = obj.get('to') or obj.get('object', {}).get('to') or [] aliases = util.trim_nulls([t.get('alias') for t in to]) object_types = util.trim_nulls([t.get('objectType') for t in to]) return (True if '@public' in aliases else None if 'unknown' in object_types else False if aliases else True)
def user_to_actor(self, user): """Converts a tweet to an activity. Args: user: dict, a decoded JSON Twitter user Returns: an ActivityStreams actor dict, ready to be JSON-encoded """ username = user.get('screen_name') if not username: return {} urls = util.trim_nulls( [e.get('expanded_url') for e in itertools.chain( *(user.get('entities', {}).get(field, {}).get('urls', []) for field in ('url', 'description')))]) url = urls[0] if urls else user.get('url') or self.user_url(username) image = (PROFILE_PICTURE_URL % username or user.get('profile_image_url_https') or user.get('profile_image_url')) if image: # remove _normal for a ~256x256 avatar rather than ~48x48 image = image.replace('_normal.', '.', 1) return util.trim_nulls({ 'objectType': 'person', 'displayName': user.get('name') or username, 'image': {'url': image}, 'id': self.tag_uri(username), # numeric_id is our own custom field that always has the source's numeric # user id, if available. 'numeric_id': user.get('id_str'), 'published': self.rfc2822_to_iso8601(user.get('created_at')), 'url': url, 'urls': [{'value': u} for u in urls], 'location': {'displayName': user.get('location')}, 'username': username, 'description': user.get('description'), })
def get_actor(self, user_id=None): """Returns a user as a JSON ActivityStreams actor dict. Args: user_id: string id or username. Defaults to 'self', ie the current user. Raises: InstagramAPIError """ if user_id is None: user_id = 'self' return self.user_to_actor(util.trim_nulls( self.urlopen(API_USER_URL % user_id) or {}))
def postprocess_activity(self, activity): """Does source-independent post-processing of an activity, in place. Right now just populates the title field. Args: activity: activity dict """ activity = util.trim_nulls(activity) # maps object type to human-readable name to use in title TYPE_DISPLAY_NAMES = {"image": "photo", "product": "gift"} # maps verb to human-readable verb DISPLAY_VERBS = { "give": "gave", "like": "likes", "listen": "listened to", "play": "watched", "read": "read", "share": "shared", } actor_name = self.actor_name(activity.get("actor")) obj = activity.get("object") if obj and not activity.get("title"): verb = DISPLAY_VERBS.get(activity["verb"]) obj_name = obj.get("displayName") obj_type = TYPE_DISPLAY_NAMES.get(obj.get("objectType")) if obj_name and not verb: activity["title"] = obj_name elif verb and (obj_name or obj_type): app = activity.get("generator", {}).get("displayName") name = obj_name if obj_name else "a %s" % (obj_type or "unknown") app = " on %s" % app if app else "" activity["title"] = "%s %s %s%s." % (actor_name, verb or "posted", name, app) return util.trim_nulls(activity)
def postprocess_object(obj): """Does source-independent post-processing of an object, in place. Populates location.position based on latitude and longitude. Args: object: object dict """ loc = obj.get('location') if loc: lat = loc.get('latitude') lon = loc.get('longitude') if lat and lon and not loc.get('position'): # ISO 6709 location string. details: http://en.wikipedia.org/wiki/ISO_6709 loc['position'] = '%0+10.6f%0+11.6f/' % (lat, lon) return util.trim_nulls(obj)
def postprocess_object(self, obj): """Does source-independent post-processing of an object, in place. * populates location.position based on latitude and longitude Args: object: object dict """ loc = obj.get("location") if loc: lat = loc.get("latitude") lon = loc.get("longitude") if lat and lon and not loc.get("position"): # ISO 6709 location string. details: http://en.wikipedia.org/wiki/ISO_6709 loc["position"] = "%+f%+f/" % (lat, lon) return util.trim_nulls(obj)
def fetch_mentions(self, username, min_id=None): """Fetches a user's @-mentions and returns them as ActivityStreams. Tries to only include explicit mentions, not mentions automatically created by @-replying. See the get_activities() docstring for details. Args: username: string min_id: only return activities with ids greater than this Returns: list of activity dicts """ # get mentions url = API_SEARCH_URL % { 'q': urllib.quote_plus('@' + username), 'count': 100, } if min_id is not None: url = util.add_query_params(url, {'since_id': min_id}) candidates = self.urlopen(url)['statuses'] # fetch in-reply-to tweets (if any) in_reply_to_ids = util.trim_nulls( [c.get('in_reply_to_status_id_str') for c in candidates]) origs = {o.get('id_str'): o for o in self.urlopen(API_LOOKUP_URL % ','.join(in_reply_to_ids))} # filter out tweets that we don't consider mentions mentions = [] for c in candidates: if (c.get('user', {}).get('screen_name') == username or c.get('retweeted_status')): continue reply_to = origs.get(c.get('in_reply_to_status_id_str')) if not reply_to: mentions.append(c) else: reply_to_user = reply_to.get('user', {}).get('screen_name') mentioned = [u.get('screen_name') for u in reply_to.get('entities', {}).get('user_mentions', [])] if username != reply_to_user and username not in mentioned: mentions.append(c) return mentions
def get_actor(self, user_id=None): """Returns a user as a JSON ActivityStreams actor dict. Args: user_id: string id or username. Defaults to 'self', ie the current user. Raises: InstagramAPIError """ if user_id is None: assert self.scrape is False, 'get_actor() requires user_id when scraping' user_id = 'self' if self.scrape: return self.get_activities_response(group_id=source.SELF, user_id=user_id ).get('actor') else: return self.user_to_actor(util.trim_nulls( self.urlopen(API_USER_URL % user_id) or {}))
def user_to_actor(self, user): """Converts a user to an actor. Args: user: dict, a decoded JSON Facebook user Returns: an ActivityStreams actor dict, ready to be JSON-encoded """ if not user: return {} id = user.get('id') username = user.get('username') handle = username or id if not handle: return {} url = (user.get('website') or user.get('link') or 'http://facebook.com/' + handle) # facebook implements this as a 302 redirect image_url = 'http://graph.facebook.com/%s/picture?type=large' % handle actor = { 'displayName': user.get('name'), 'image': {'url': image_url}, 'id': self.tag_uri(handle), # numeric_id is our own custom field that always has the source's numeric # user id, if available. 'numeric_id': id, 'updated': util.maybe_iso8601_to_rfc3339(user.get('updated_time')), 'url': url, 'username': username, 'description': user.get('bio'), } location = user.get('location') if location: actor['location'] = {'id': location.get('id'), 'displayName': location.get('name')} return util.trim_nulls(actor)
def user_to_actor(self, resp): """Convert a Flickr user dict into an ActivityStreams actor. """ person = resp.get('person', {}) username = person.get('username', {}).get('_content') obj = util.trim_nulls({ 'objectType': 'person', 'displayName': person.get('realname', {}).get('_content') or username, 'image': { 'url': self.get_user_image(person.get('iconfarm'), person.get('iconserver'), person.get('nsid')), }, 'id': self.tag_uri(username), # numeric_id is our own custom field that always has the source's numeric # user id, if available. 'numeric_id': person.get('nsid'), 'location': { 'displayName': person.get('location', {}).get('_content'), }, 'username': username, 'description': person.get('description', {}).get('_content'), }) # fetch profile page to get url(s) profile_url = person.get('profileurl', {}).get('_content') if profile_url: try: logging.debug('fetching flickr profile page %s', profile_url) resp = urllib2.urlopen( profile_url, timeout=appengine_config.HTTP_TIMEOUT) profile_json = mf2py.parse(doc=resp, url=profile_url) # personal site is likely the first non-flickr url urls = profile_json.get('rels', {}).get('me', []) obj['urls'] = [{'value': u} for u in urls] obj['url'] = next( (u for u in urls if not u.startswith('https://www.flickr.com/')), None) except urllib2.URLError, e: logging.warning('could not fetch user homepage %s', profile_url)
def user_to_actor(self, resp): """Convert a Flickr user dict into an ActivityStreams actor. """ person = resp.get('person', {}) username = person.get('username', {}).get('_content') obj = util.trim_nulls({ 'objectType': 'person', 'displayName': person.get('realname', {}).get('_content') or username, 'image': { 'url': self.get_user_image(person.get('iconfarm'), person.get('iconserver'), person.get('nsid')), }, 'id': self.tag_uri(username), # numeric_id is our own custom field that always has the source's numeric # user id, if available. 'numeric_id': person.get('nsid'), 'location': { 'displayName': person.get('location', {}).get('_content'), }, 'username': username, 'description': person.get('description', {}).get('_content'), }) # fetch profile page to get url(s) profile_url = person.get('profileurl', {}).get('_content') if profile_url: try: resp = util.urlopen(profile_url) profile_json = mf2py.parse(doc=resp, url=profile_url, img_with_alt=True) urls = profile_json.get('rels', {}).get('me', []) if urls: obj['url'] = urls[0] if len(urls) > 1: obj['urls'] = [{'value': u} for u in urls] except urllib_error.URLError: logging.warning('could not fetch user homepage %s', profile_url) return self.postprocess_object(obj)
def user_to_actor(self, user): """Converts a user to an actor. Args: user: JSON object from the Instagram API Returns: an ActivityStreams actor dict, ready to be JSON-encoded """ if not user: return {} id = user.get('id') username = user.get('username') actor = { 'id': self.tag_uri(id or username), 'username': username, } if not id or not username: return actor urls = sum((util.extract_links(user.get(field)) for field in ('website', 'bio')), []) if urls: actor['url'] = urls[0] if len(urls) > 1: actor['urls'] = [{'value': u} for u in urls] else: actor['url'] = self.user_url(username) actor.update({ 'objectType': 'person', 'displayName': user.get('full_name') or username, 'image': {'url': user.get('profile_picture')}, 'description': user.get('bio') }) return util.trim_nulls(actor)
def user_to_actor(self, user): """Converts a user to an actor. Args: user: python_instagram.models.User or dict Returns: an ActivityStreams actor dict, ready to be JSON-encoded """ if not user: return {} elif isinstance(user, dict): user = python_instagram.models.User.object_from_dictionary(user) id = getattr(user, 'id', None) username = getattr(user, 'username', None) actor = { 'id': self.tag_uri(id or username), 'username': username, } if not id or not username: return actor url = getattr(user, 'website', None) if not url: url = self.user_url(username) actor.update({ 'objectType': 'person', 'displayName': user.full_name, 'image': {'url': user.profile_picture}, 'url': url, 'description': getattr(user, 'bio', None) }) return util.trim_nulls(actor)
def activities_to_jsonfeed(activities, actor=None, title=None, feed_url=None, home_page_url=None): """Converts ActivityStreams activities to a JSON feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title home_page_url: string, the home page URL feed_url: the URL of the JSON Feed, if any. Included in the feed_url field. Returns: dict, JSON Feed data, ready to be JSON-encoded """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, basestring)): raise TypeError('activities may not be a dict or string') def image_url(obj): return util.get_first(obj, 'image', {}).get('url') def actor_name(obj): return obj.get('displayName') or obj.get('username') if not actor: actor = {} items = [] for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue author = obj.get('author', {}) content = microformats2.render_content( obj, include_location=True, render_attachments=True) obj_title = obj.get('title') or obj.get('displayName') item = { 'id': obj.get('id') or obj.get('url'), 'url': obj.get('url'), 'image': image_url(obj), 'title': obj_title if mf2util.is_name_a_title(obj_title, content) else None, 'summary': obj.get('summary'), 'content_html': content, 'date_published': obj.get('published'), 'date_modified': obj.get('updated'), 'author': { 'name': actor_name(author), 'url': author.get('url'), 'avatar': image_url(author), }, 'attachments': [], } for att in obj.get('attachments', []): url = (util.get_first(att, 'stream') or util.get_first(att, 'image') or att ).get('url') mime = mimetypes.guess_type(url)[0] if url else None if (att.get('objectType') in ATTACHMENT_TYPES or mime and mime.split('/')[0] in ATTACHMENT_TYPES): item['attachments'].append({ 'url': url or '', 'mime_type': mime, 'title': att.get('title'), }) if not item['content_html']: item['content_text'] = '' items.append(item) return util.trim_nulls({ 'version': 'https://jsonfeed.org/version/1', 'title': title or actor_name(actor) or 'JSON Feed', 'feed_url': feed_url, 'home_page_url': home_page_url or actor.get('url'), 'author': { 'name': actor_name(actor), 'url': actor.get('url'), 'avatar': image_url(actor), }, 'items': items, }, ignore='content_text')
def template_vars(self, domain=None, url=None): logging.debug(f'Headers: {list(request.headers.items())}') if domain.split('.')[-1] in NON_TLDS: error(f"{domain} doesn't look like a domain", status=404) # find representative h-card. try url, then url's home page, then domain urls = [f'http://{domain}/'] if url: urls = [url, urllib.parse.urljoin(url, '/')] + urls for candidate in urls: resp = common.requests_get(candidate) parsed = util.parse_html(resp) mf2 = util.parse_mf2(parsed, url=resp.url) # logging.debug(f'Parsed mf2 for {resp.url}: {json_dumps(mf2, indent=2)}') hcard = mf2util.representative_hcard(mf2, resp.url) if hcard: logging.info( f'Representative h-card: {json_dumps(hcard, indent=2)}') break else: error( f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {resp.url}" ) logging.info(f'Generating WebFinger data for {domain}') key = models.MagicKey.get_or_create(domain) props = hcard.get('properties', {}) urls = util.dedupe_urls(props.get('url', []) + [resp.url]) canonical_url = urls[0] acct = f'{domain}@{domain}' for url in urls: if url.startswith('acct:'): urluser, urldomain = util.parse_acct_uri(url) if urldomain == domain: acct = f'{urluser}@{domain}' logging.info(f'Found custom username: acct:{acct}') break # discover atom feed, if any atom = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if atom and atom['href']: atom = urllib.parse.urljoin(resp.url, atom['href']) else: atom = 'https://granary.io/url?' + urllib.parse.urlencode( { 'input': 'html', 'output': 'atom', 'url': resp.url, 'hub': resp.url, }) # discover PuSH, if any for link in resp.headers.get('Link', '').split(','): match = common.LINK_HEADER_RE.match(link) if match and match.group(2) == 'hub': hub = match.group(1) else: hub = 'https://bridgy-fed.superfeedr.com/' # generate webfinger content data = util.trim_nulls({ 'subject': 'acct:' + acct, 'aliases': urls, 'magic_keys': [{ 'value': key.href() }], 'links': sum(([{ 'rel': 'http://webfinger.net/rel/profile-page', 'type': 'text/html', 'href': url, }] for url in urls if url.startswith("http")), []) + [{ 'rel': 'http://webfinger.net/rel/avatar', 'href': get_text(url), } for url in props.get('photo', [])] + [ { 'rel': 'canonical_uri', 'type': 'text/html', 'href': canonical_url, }, # ActivityPub { 'rel': 'self', 'type': common.CONTENT_TYPE_AS2, # WARNING: in python 2 sometimes request.host_url lost port, # http://localhost:8080 would become just http://localhost. no # clue how or why. pay attention here if that happens again. 'href': f'{request.host_url}{domain}', }, { 'rel': 'inbox', 'type': common.CONTENT_TYPE_AS2, 'href': f'{request.host_url}{domain}/inbox', }, # OStatus { 'rel': 'http://schemas.google.com/g/2010#updates-from', 'type': common.CONTENT_TYPE_ATOM, 'href': atom, }, { 'rel': 'hub', 'href': hub, }, { 'rel': 'magic-public-key', 'href': key.href(), }, { 'rel': 'salmon', 'href': f'{request.host_url}{domain}/salmon', } ] }) logging.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}') return data
def postprocess_as2(self, activity, target=None, key=None): """Prepare an AS2 object to be served or sent via ActivityPub. Args: activity: dict, AS2 object or activity target: dict, AS2 object, optional. The target of activity's inReplyTo or Like/Announce/etc object, if any. key: MagicKey, optional. populated into publicKey field if provided. """ type = activity.get('type') # actor objects if type == 'Person': self.postprocess_as2_actor(activity) if not activity.get('publicKey'): # underspecified, inferred from this issue and Mastodon's implementation: # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229 # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77 activity['publicKey'] = { 'id': activity.get('preferredUsername'), 'publicKeyPem': key.public_pem().decode(), } return activity for actor in (util.get_list(activity, 'attributedTo') + util.get_list(activity, 'actor')): self.postprocess_as2_actor(actor) # inReplyTo: singly valued, prefer id over url target_id = target.get('id') if target else None in_reply_to = activity.get('inReplyTo') if in_reply_to: if target_id: activity['inReplyTo'] = target_id elif isinstance(in_reply_to, list): if len(in_reply_to) > 1: logging.warning( "AS2 doesn't support multiple inReplyTo URLs! " 'Only using the first: %s' % in_reply_to[0]) activity['inReplyTo'] = in_reply_to[0] # Mastodon evidently requires a Mention tag for replies to generate a # notification to the original post's author. not required for likes, # reposts, etc. details: # https://github.com/snarfed/bridgy-fed/issues/34 if target: for to in (util.get_list(target, 'attributedTo') + util.get_list(target, 'actor')): if isinstance(to, dict): to = to.get('url') or to.get('id') if to: activity.setdefault('tag', []).append({ 'type': 'Mention', 'href': to, }) # activity objects (for Like, Announce, etc): prefer id over url obj = activity.get('object') if obj: if isinstance(obj, dict) and not obj.get('id'): obj['id'] = target_id or obj.get('url') elif target_id and obj != target_id: activity['object'] = target_id # id is required for most things. default to url if it's not set. if not activity.get('id'): activity['id'] = activity.get('url') # TODO: find a better way to check this, sometimes or always? # removed for now since it fires on posts without u-id or u-url, eg # https://chrisbeckstrom.com/2018/12/27/32551/ # assert activity.get('id') or (isinstance(obj, dict) and obj.get('id')) activity['id'] = self.redirect_wrap(activity.get('id')) activity['url'] = self.redirect_wrap(activity.get('url')) # copy image(s) into attachment(s). may be Mastodon-specific. # https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618 obj_or_activity = obj if isinstance(obj, dict) else activity obj_or_activity.setdefault('attachment', []).extend(obj_or_activity.get('image', [])) # cc public and target's author(s) and recipients # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting # https://w3c.github.io/activitypub/#delivery if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'): recips = [AS2_PUBLIC_AUDIENCE] if target: recips += itertools.chain(*(util.get_list(target, field) for field in ('actor', 'attributedTo', 'to', 'cc'))) activity['cc'] = util.dedupe_urls( util.get_url(recip) or recip.get('id') for recip in recips) # wrap articles and notes in a Create activity if type in ('Article', 'Note'): activity = { '@context': as2.CONTEXT, 'type': 'Create', 'object': activity, } return util.trim_nulls(activity)
def get_activities_response(self, user_id=None, group_id=None, app_id=None, activity_id=None, start_index=0, count=0, etag=None, min_id=None, cache=None, fetch_replies=False, fetch_likes=False, fetch_shares=False, fetch_events=False, fetch_mentions=False, search_query=None, scrape=False, cookie=None, **kwargs): """Fetches posts and converts them to ActivityStreams activities. See method docstring in source.py for details. app_id is ignored. Supports min_id, but not ETag, since Instagram doesn't support it. http://instagram.com/developer/endpoints/users/#get_users_feed http://instagram.com/developer/endpoints/users/#get_users_media_recent Likes are always included, regardless of the fetch_likes kwarg. They come bundled in the 'likes' field of the API Media object: http://instagram.com/developer/endpoints/media/# Mentions are never fetched or included because the API doesn't support searching for them. https://github.com/snarfed/bridgy/issues/523#issuecomment-155523875 Shares are never fetched included since there is no share feature. Instagram only supports search over hashtags, so if search_query is set, it must begin with #. May populate a custom 'ig_like_count' property in media objects. (Currently only when scraping.) Args: scrape: if True, scrapes HTML from instagram.com instead of using the API. Populates the user's actor object in the 'actor' response field. Useful for apps that haven't yet been approved in the new permissions approval process. Currently only supports group_id=SELF. Also supports passing a shortcode as activity_id as well as the internal API id. http://developers.instagram.com/post/133424514006/instagram-platform-update cookie: string, only used if scrape=True **: see :meth:`Source.get_activities_reponse` Raises: InstagramAPIError """ if scrape or self.scrape: if not (activity_id or (group_id == source.SELF and user_id) or (group_id == source.FRIENDS and cookie)): raise NotImplementedError( 'Scraping only supports activity_id, user_id and group_id=@self, or cookie and group_id=@friends.' ) return self._scrape(user_id=user_id, activity_id=activity_id, cookie=cookie, fetch_extras=fetch_replies or fetch_likes, cache=cache) if user_id is None: user_id = 'self' if group_id is None: group_id = source.FRIENDS if search_query: if search_query.startswith('#'): search_query = search_query[1:] else: raise ValueError( 'Instagram only supports search over hashtags, so search_query must ' 'begin with the # character.') # TODO: paging media = [] kwargs = {} if min_id is not None: kwargs['min_id'] = min_id activities = [] try: media_url = ( API_MEDIA_URL % activity_id if activity_id else API_USER_MEDIA_URL % user_id if group_id == source.SELF else API_MEDIA_POPULAR_URL if group_id == source.ALL else API_MEDIA_SEARCH_URL % search_query if group_id == source.SEARCH else API_USER_FEED_URL if group_id == source.FRIENDS else None) assert media_url media = self.urlopen(util.add_query_params(media_url, kwargs)) if media: if activity_id: media = [media] activities += [ self.media_to_activity(m) for m in util.trim_nulls(media) ] if group_id == source.SELF and fetch_likes: # add the user's own likes liked = self.urlopen( util.add_query_params(API_USER_LIKES_URL % user_id, kwargs)) if liked: user = self.urlopen(API_USER_URL % user_id) activities += [ self.like_to_object(user, l['id'], l['link']) for l in liked ] except urllib2.HTTPError, e: code, body = util.interpret_http_exception(e) # instagram api should give us back a json block describing the # error. but if it's an error for some other reason, it probably won't # be properly formatted json. try: body_obj = json.loads(body) if body else {} except ValueError: body_obj = {} if body_obj.get('meta', {}).get('error_type') == 'APINotFoundError': logging.exception( body_obj.get('meta', {}).get('error_message')) else: raise e
def object_urls(obj): """Returns an object's unique URLs, preserving order. """ return util.uniquify(util.trim_nulls( [obj.get('url')] + [u.get('value') for u in obj.get('urls', [])]))
def json_to_object(mf2, actor=None): """Converts microformats2 JSON to an ActivityStreams object. Args: mf2: dict, decoded JSON microformats2 object actor: optional author AS actor object. usually comes from a rel="author" link. if mf2 has its own author, that will override this. Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} mf2 = copy.copy(mf2) props = mf2.setdefault('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None author = json_to_object(prop['author']) if prop.get('author') else actor # maps mf2 type to ActivityStreams objectType and optional verb. mf2_type_to_as_type = { 'rsvp': ('activity', rsvp_verb), 'invite': ('activity', 'invite'), 'repost': ('activity', 'share'), 'like': ('activity', 'like'), 'reply': ('comment', None), 'person': ('person', None), 'location': ('place', None), 'note': ('note', None), 'article': ('article', None), } mf2_types = mf2.get('type') or [] if 'h-geo' in mf2_types or 'p-location' in mf2_types: mf2_type = 'location' else: # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type # *is* a photo. so, special case photo type to fall through to underlying # mf2 type without photo. # https://github.com/snarfed/bridgy/issues/702 without_photo = copy.deepcopy(mf2) without_photo.get('properties', {}).pop('photo', None) mf2_type = mf2util.post_type_discovery(without_photo) as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None)) def absolute_urls(prop): return [{'url': url} for url in get_string_urls(props.get(prop, [])) # filter out relative and invalid URLs (mf2py gives absolute urls) if urlparse.urlparse(url).netloc] urls = props.get('url') and get_string_urls(props.get('url')) obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': urls[0] if urls else None, 'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None, 'image': absolute_urls('photo'), 'stream': absolute_urls('video'), 'location': json_to_object(prop.get('location')), 'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]}, 'tags': [{'objectType': 'hashtag', 'displayName': cat} if isinstance(cat, basestring) else json_to_object(cat) for cat in props.get('category', [])], } # mf2util uses the indieweb/mf2 location algorithm to collect location properties. interpreted = mf2util.interpret({'items': [mf2]}, None) if interpreted: loc = interpreted.get('location') if loc: obj['location']['objectType'] = 'place' lat, lng = loc.get('latitude'), loc.get('longitude') if lat and lng: try: obj['location']['latitude'] = float(lat) obj['location']['longitude'] = float(lng) # TODO fill in 'position', maybe using Source.postprocess_object? except ValueError: logging.warn( 'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng) if as_type == 'activity': objects = [] for target in itertools.chain.from_iterable( props.get(field, []) for field in ( 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')): t = json_to_object(target) if isinstance(target, dict) else {'url': target} # eliminate duplicates from redundant backcompat properties if t not in objects: objects.append(t) obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) else: obj.update({ 'inReplyTo': [{'url': url} for url in get_string_urls(props.get('in-reply-to', []))], 'author': author, }) return util.trim_nulls(obj)
def object_to_json(obj, trim_nulls=True, entry_class='h-entry', default_object_type=None, synthesize_content=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values entry_class: string, the mf2 class that entries should be given (e.g. 'h-cite' when parsing a reference to a foreign entry). defaults to 'h-entry' default_object_type: string, the ActivityStreams objectType to use if one is not present. defaults to None synthesize_content: whether to generate synthetic content if the object doesn't have its own, e.g. 'likes this.' or 'shared this.' Returns: dict, decoded microformats2 JSON """ if not obj or not isinstance(obj, dict): return {} obj_type = source.object_type(obj) or default_object_type # if the activity type is a post, then it's really just a conduit # for the object. for other verbs, the activity itself is the # interesting thing if obj_type == 'post': primary = obj.get('object', {}) obj_type = source.object_type(primary) or default_object_type else: primary = obj # TODO: extract snippet name = primary.get('displayName', primary.get('title')) summary = primary.get('summary') author = obj.get('author', obj.get('actor', {})) in_reply_tos = obj.get( 'inReplyTo', obj.get('context', {}).get('inReplyTo', [])) is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe') if (is_rsvp or obj_type == 'react') and obj.get('object'): objs = obj['object'] in_reply_tos.extend(objs if isinstance(objs, list) else [objs]) # TODO: more tags. most will be p-category? ret = { 'type': (['h-card'] if obj_type == 'person' else ['h-card', 'p-location'] if obj_type == 'place' else [entry_class]), 'properties': { 'uid': [obj.get('id', '')], 'name': [name], 'summary': [summary], 'url': (list(object_urls(obj) or object_urls(primary)) + obj.get('upstreamDuplicates', [])), 'photo': [image.get('url') for image in (util.get_list(obj, 'image') or util.get_list(primary, 'image'))], 'video': [obj.get('stream', primary.get('stream', {})).get('url')], 'published': [obj.get('published', primary.get('published', ''))], 'updated': [obj.get('updated', primary.get('updated', ''))], 'content': [{ 'value': xml.sax.saxutils.unescape(primary.get('content', '')), 'html': render_content(primary, include_location=False, synthesize_content=synthesize_content), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [object_to_json( author, trim_nulls=False, default_object_type='person')], 'location': [object_to_json( primary.get('location', {}), trim_nulls=False, default_object_type='place')], 'latitude': primary.get('latitude'), 'longitude': primary.get('longitude'), 'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in obj.get('replies', {}).get('items', [])], }, 'children': [object_to_json(c, trim_nulls=False, entry_class='h-cite') for c in primary.get('attachments', []) if c.get('objectType') in ('note', 'article')], } # hashtags and person tags tags = obj.get('tags', []) or util.get_first(obj, 'object', {}).get('tags', []) ret['properties']['category'] = [] for tag in tags: if tag.get('objectType') == 'person': cls = 'u-category h-card' elif tag.get('objectType') == 'hashtag': cls = 'u-category' else: continue ret['properties']['category'].append(object_to_json(tag, entry_class=cls)) # rsvp if is_rsvp: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False, default_object_type='person') ret['properties']['invitee'] = [invitee] # like and repost mentions for type, prop in ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = util.get_list(obj, 'object') ret['properties'][prop + '-of'] = [ # flatten contexts that are just a url o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType']) else object_to_json(o, trim_nulls=False, entry_class='h-cite') for o in objs] else: # received likes and reposts ret['properties'][prop] = [ object_to_json(t, trim_nulls=False, entry_class='h-cite') for t in tags if source.object_type(t) == type] if trim_nulls: ret = util.trim_nulls(ret) return ret
def html_to_activities(self, html): """Converts Instagram HTML to ActivityStreams activities. The input HTML may be from: * a user's feed, eg https://www.instagram.com/ while logged in * a user's profile, eg https://www.instagram.com/snarfed/ * a photo or video, eg https://www.instagram.com/p/BBWCSrfFZAk/ Args: html: unicode string Returns: tuple, ([ActivityStreams activities], ActivityStreams viewer actor) """ # extract JSON data blob script_start = '<script type="text/javascript">window._sharedData = ' start = html.find(script_start) if start == -1: # Instagram sometimes returns 200 with incomplete HTML. often it stops at # the end of one of the <style> tags inside <head>. not sure why. logging.warning('JSON script tag not found!') return [], None # App Engine's Python 2.7.5 json module doesn't support unpaired surrogate # Unicode chars, so it chokes on some JSON docs. Monkey patch in simplejson # to fix that. # https://code.google.com/p/googleappengine/issues/detail?id=12823 # http://stackoverflow.com/questions/15236742 try: import simplejson json_module = simplejson except ImportError: json_module = json start += len(script_start) end = html.find(';</script>', start) if end == -1: # as mentioned above, Instagram sometimes returns 200 with incomplete HTML logging.warning('JSON script close tag not found!') return [], None data = util.trim_nulls(json_module.loads(html[start:end])) entry_data = data.get('entry_data', {}) activities = [] # find media medias = [] profile_user = None for page in entry_data.get('FeedPage', []): medias.extend( page.get('feed', {}).get('media', {}).get('nodes', [])) for page in entry_data.get('ProfilePage', []): profile_user = page.get('user', {}) medias.extend(profile_user.get('media', {}).get('nodes', [])) medias.extend( page.get('media') for page in entry_data.get('PostPage', [])) for media in util.trim_nulls(medias): activities.append(self._json_media_node_to_activity(media)) actor = None viewer = data.get('config', {}).get('viewer') or profile_user or None if viewer: profile = viewer.get('profile_pic_url') if profile: viewer['profile_picture'] = profile.replace('\/', '/') website = viewer.get('external_url') if website: viewer['website'] = website.replace('\/', '/') viewer.setdefault('bio', viewer.get('biography')) actor = self.user_to_actor(viewer) if viewer.get('is_private'): actor['to'] = [{'objectType': 'group', 'alias': '@private'}] return activities, actor
def _json_media_node_to_activity(self, media): """Converts Instagram HTML JSON media node to ActivityStreams activity. Args: media: dict, subset of Instagram HTML JSON representing a single photo or video Returns: dict, ActivityStreams activity """ # preprocess to make its field names match the API's dims = media.get('dimensions', {}) owner = media.get('owner', {}) image_url = media.get('display_src') or media.get('display_url') or '' media.update({ 'link': self.media_url(media.get('code') or media.get('shortcode')), 'user': owner, 'created_time': media.get('date'), 'caption': { 'text': media.get('caption') }, 'images': { 'standard_resolution': { 'url': image_url.replace('\/', '/'), 'width': dims.get('width'), 'height': dims.get('height'), } }, 'users_in_photo': media.get('usertags', {}).get('nodes', []), }) id = media.get('id') owner_id = owner.get('id') if id and owner_id: media['id'] = '%s_%s' % (id, owner_id) comments = media.setdefault('comments', {}).setdefault('nodes', []) likes = media.setdefault('likes', {}).setdefault('nodes', []) for obj in [media] + comments + likes: obj['user']['profile_picture'] = \ obj['user'].get('profile_pic_url', '').replace('\/', '/') media['comments']['data'] = comments for c in media['comments']['data']: c['from'] = c['user'] c['created_time'] = c['created_at'] media['likes']['data'] = [l['user'] for l in likes] if media.get('is_video'): media.update({ 'type': 'video', 'videos': { 'standard_resolution': { 'url': media.get('video_url', '').replace('\/', '/'), 'width': dims.get('width'), 'height': dims.get('height'), } }, }) activity = self.media_to_activity(util.trim_nulls(media)) obj = activity['object'] obj['ig_like_count'] = media['likes'].get('count', 0) # multi-photo children = media.get('edge_sidecar_to_children', {}).get('edges', []) if children: obj['attachments'] = list( itertools.chain(*(self._json_media_node_to_activity( child.get('node'))['object']['attachments'] for child in children))) self.postprocess_object(obj) return super(Instagram, self).postprocess_activity(activity)
def json_to_object(mf2): """Converts microformats2 JSON to an ActivityStreams object. Args: mf2: dict, decoded JSON microformats2 object Returns: dict, ActivityStreams object """ if not mf2: return {} props = mf2.get('properties', {}) prop = first_props(props) content = prop.get('content', {}) rsvp = prop.get('rsvp') rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None author = json_to_object(prop.get('author')) # maps mf2 type to ActivityStreams objectType and optional verb. ordered by # priority. types = mf2.get('type', []) types_map = [ ('h-as-rsvp', 'activity', rsvp_verb), ('h-as-repost', 'activity', 'share'), ('h-as-like', 'activity', 'like'), ('p-comment', 'comment', None), ('h-as-reply', 'comment', None), ('p-location', 'place', None), ('h-card', 'person', None), ] # fallback if none of the above mf2 types are found. maps property (if it # exists) to objectType and verb. ordered by priority. prop_types_map = [ ('rsvp', 'activity', rsvp_verb), ('invitee', 'activity', 'invite'), ('repost', 'activity', 'share'), ('repost-of', 'activity', 'share'), ('like', 'activity', 'like'), ('like-of', 'activity', 'like'), ('in-reply-to', 'comment', None), ] for mf2_type, as_type, as_verb in types_map: if mf2_type in types: break # found else: for p, as_type, as_verb in prop_types_map: if p in props: break else: # default as_type = 'note' if 'h-as-note' in types else 'article' as_verb = None obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'displayName': prop.get('name'), 'content': content.get('value') or content.get('html'), 'url': prop.get('url'), 'image': {'url': prop.get('photo')}, 'location': json_to_object(prop.get('location')), 'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]}, # TODO # location } if as_type == 'activity': urls = set(itertools.chain.from_iterable(get_string_urls(props.get(field, [])) for field in ('like', 'like-of', 'repost', 'repost-of', 'in-reply-to'))) objects = [{'url': url} for url in urls] objects += [json_to_object(i) for i in props.get('invitee', [])] obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) else: obj.update({ 'inReplyTo': [{'url': url} for url in get_string_urls(props.get('in-reply-to', []))], 'author': author, }) return util.trim_nulls(obj)
def object_to_json(obj, ctx={}, trim_nulls=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object ctx: dict, a decoded JSON ActivityStreams context trim_nulls: boolean, whether to remove elements with null or empty values Returns: dict, decoded microformats2 JSON """ if not obj: return {} types_map = { 'article': ['h-entry', 'h-as-article'], 'comment': ['h-entry', 'p-comment'], 'like': ['h-entry', 'h-as-like'], 'note': ['h-entry', 'h-as-note'], 'person': ['h-card'], 'place': ['h-card', 'p-location'], 'share': ['h-entry', 'h-as-repost'], 'rsvp-yes': ['h-entry', 'h-as-rsvp'], 'rsvp-no': ['h-entry', 'h-as-rsvp'], 'rsvp-maybe': ['h-entry', 'h-as-rsvp'], 'invite': ['h-entry'], } obj_type = source.object_type(obj) types = types_map.get(obj_type, ['h-entry']) url = obj.get('url', '') content = obj.get('content', '') # TODO: extract snippet name = obj.get('displayName', obj.get('title')) summary = obj.get('summary') author = obj.get('author', obj.get('actor', {})) author = object_to_json(author, trim_nulls=False) if author: author['type'] = ['h-card'] location = object_to_json(obj.get('location', {}), trim_nulls=False) if location: location['type'] = ['h-card', 'p-location'] in_reply_tos = obj.get('inReplyTo', []) + ctx.get('inReplyTo', []) if 'h-as-rsvp' in types and 'object' in obj: in_reply_tos.append(obj['object']) # TODO: more tags. most will be p-category? ret = { 'type': types, 'properties': { 'uid': [obj.get('id', '')], 'name': [name], 'summary': [summary], 'url': [url] + obj.get('upstreamDuplicates', []), 'photo': [obj.get('image', {}).get('url', '')], 'video': [obj.get('stream', {}).get('url')], 'published': [obj.get('published', '')], 'updated': [obj.get('updated', '')], 'content': [{ 'value': xml.sax.saxutils.unescape(content), 'html': render_content(obj, include_location=False), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [author], 'location': [location], 'comment': [ object_to_json(c, trim_nulls=False) for c in obj.get('replies', {}).get('items', []) ], } } # rsvp if 'h-as-rsvp' in types: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False) invitee['type'].append('p-invitee') ret['properties']['invitee'] = [invitee] # likes and reposts # http://indiewebcamp.com/like#Counterproposal for type, prop in ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = obj.get('object', []) if not isinstance(objs, list): objs = [objs] ret['properties'][prop] = ret['properties'][prop + '-of'] = \ [o.get('url') for o in objs] else: ret['properties'][prop] = [ object_to_json(t, trim_nulls=False) for t in obj.get('tags', []) if source.object_type(t) == type ] if trim_nulls: ret = util.trim_nulls(ret) return ret
def object_to_json(obj, trim_nulls=True): """Converts an ActivityStreams object to microformats2 JSON. Args: obj: dict, a decoded JSON ActivityStreams object trim_nulls: boolean, whether to remove elements with null or empty values Returns: dict, decoded microformats2 JSON """ if not obj: return {} types_map = {'article': ['h-entry', 'h-as-article'], 'comment': ['h-entry', 'p-comment'], 'like': ['h-entry', 'h-as-like'], 'note': ['h-entry', 'h-as-note'], 'person': ['h-card'], 'place': ['h-card', 'p-location'], 'share': ['h-entry', 'h-as-repost'], 'rsvp-yes': ['h-entry', 'h-as-rsvp'], 'rsvp-no': ['h-entry', 'h-as-rsvp'], 'rsvp-maybe': ['h-entry', 'h-as-rsvp'], 'invite': ['h-entry'], } obj_type = object_type(obj) types = types_map.get(obj_type, ['h-entry']) url = obj.get('url', '') content = obj.get('content', '') # TODO: extract snippet name = obj.get('displayName', obj.get('title', content)) author = obj.get('author', obj.get('actor', {})) author = object_to_json(author, trim_nulls=False) if author: author['type'] = ['h-card'] location = object_to_json(obj.get('location', {}), trim_nulls=False) if location: location['type'] = ['h-card', 'p-location'] in_reply_tos = obj.get('inReplyTo', []) if 'h-as-rsvp' in types and 'object' in obj: in_reply_tos.append(obj['object']) # TODO: more tags. most will be p-category? ret = { 'type': types, 'properties': { 'uid': [obj.get('id', '')], 'name': [name], 'url': [url], 'photo': [obj.get('image', {}).get('url', '')], 'published': [obj.get('published', '')], 'updated': [obj.get('updated', '')], 'content': [{ 'value': content, 'html': render_content(obj), }], 'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]), 'author': [author], 'location': [location], 'comment': [object_to_json(c, trim_nulls=False) for c in obj.get('replies', {}).get('items', [])], } } # rsvp if 'h-as-rsvp' in types: ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]] elif obj_type == 'invite': invitee = object_to_json(obj.get('object'), trim_nulls=False) invitee['type'].append('p-invitee') ret['properties']['invitee'] = [invitee] # likes and reposts # http://indiewebcamp.com/like#Counterproposal for type, prop in ('like', 'like'), ('share', 'repost'): if obj_type == type: # The ActivityStreams spec says the object property should always be a # single object, but it's useful to let it be a list, e.g. when a like has # multiple targets, e.g. a like of a post with original post URLs in it, # which brid.gy does. objs = obj.get('object', []) if not isinstance(objs, list): objs = [objs] ret['properties'][prop] = ret['properties'][prop + '-of'] = \ [o.get('url') for o in objs] else: ret['properties'][prop] = [object_to_json(t, trim_nulls=False) for t in obj.get('tags', []) if object_type(t) == type] if trim_nulls: ret = util.trim_nulls(ret) return ret
def json_to_object(mf2): """Converts microformats2 JSON to an ActivityStreams object. Args: mf2: dict, decoded JSON microformats2 object Returns: dict, ActivityStreams object """ if not mf2 or not isinstance(mf2, dict): return {} props = mf2.get('properties', {}) prop = first_props(props) rsvp = prop.get('rsvp') rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None author = json_to_object(prop.get('author')) # maps mf2 type to ActivityStreams objectType and optional verb. ordered by # priority. types = mf2.get('type', []) types_map = [ ('h-as-rsvp', 'activity', rsvp_verb), ('h-as-repost', 'activity', 'share'), ('h-as-like', 'activity', 'like'), ('p-comment', 'comment', None), ('h-as-reply', 'comment', None), ('p-location', 'place', None), ('h-card', 'person', None), ] # fallback if none of the above mf2 types are found. maps property (if it # exists) to objectType and verb. ordered by priority. prop_types_map = [ ('rsvp', 'activity', rsvp_verb), ('invitee', 'activity', 'invite'), ('repost-of', 'activity', 'share'), ('like-of', 'activity', 'like'), ('in-reply-to', 'comment', None), ] for mf2_type, as_type, as_verb in types_map: if mf2_type in types: break # found else: for p, as_type, as_verb in prop_types_map: if p in props: break else: # default as_type = 'note' if 'h-as-note' in types else 'article' as_verb = None photos = [ url for url in get_string_urls(props.get('photo', [])) # filter out relative and invalid URLs (mf2py gives absolute urls) if urlparse.urlparse(url).netloc ] obj = { 'id': prop.get('uid'), 'objectType': as_type, 'verb': as_verb, 'published': prop.get('published', ''), 'updated': prop.get('updated', ''), 'displayName': get_text(prop.get('name')), 'summary': get_text(prop.get('summary')), 'content': get_html(prop.get('content')), 'url': prop.get('url'), 'image': { 'url': photos[0] if photos else None }, 'location': json_to_object(prop.get('location')), 'replies': { 'items': [json_to_object(c) for c in props.get('comment', [])] }, } if as_type == 'activity': urls = set( itertools.chain.from_iterable( get_string_urls(props.get(field, [])) for field in ('like', 'like-of', 'repost', 'repost-of', 'in-reply-to'))) objects = [{'url': url} for url in urls] objects += [json_to_object(i) for i in props.get('invitee', [])] obj.update({ 'object': objects[0] if len(objects) == 1 else objects, 'actor': author, }) else: obj.update({ 'inReplyTo': [{ 'url': url } for url in get_string_urls(props.get('in-reply-to', []))], 'author': author, }) return util.trim_nulls(obj)