def find_mention_item(self, data): """Returns the mf2 item that mentions (or replies to, likes, etc) the target. May modify the data arg, e.g. may set or replace content.html or content.value. Args: data mf2 data dict Returns: mf2 item dict or None """ # find target URL in source for item in data.get('items', []): props = item.setdefault('properties', {}) # find first non-empty content element content = props.setdefault('content', [{}])[0] text = content.get('html') or content.get('value') for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of': urls = [urlparse.urldefrag(u)[0] for u in microformats2.get_string_urls(props.get(type, []))] if self.target_url in urls: break else: if not text or self.target_url not in text: continue type = 'post' url = first_value(props, 'url') or self.source_url name = first_value(props, 'name') or first_value(props, 'summary') text = content['html'] = ('mentioned this in %s.' % util.pretty_link(url, text=name)) if type: # found the target! rsvp = first_value(props, 'rsvp') if rsvp: self.entity.type = 'rsvp' if not text: content['value'] = 'RSVPed %s.' % rsvp else: self.entity.type = {'in-reply-to': 'comment', 'like-of': 'like', 'repost-of': 'repost', }.get(type, type) if not text: content['value'] = {'comment': 'replied to this.', 'like': 'liked this.', 'repost': 'reposted this.', }[self.entity.type] return item return None
def expand_target_urls(self, activity): """Expand the inReplyTo or object fields of an ActivityStreams object by fetching the original and looking for rel=syndication URLs. This method modifies the dict in place. Args: activity: an ActivityStreams dict of the activity being published """ for field in ('inReplyTo', 'object'): # microformats2.json_to_object de-dupes, no need to do it here objs = activity.get(field) if not objs: continue if isinstance(objs, dict): objs = [objs] augmented = list(objs) for obj in objs: url = obj.get('url') if not url: continue # get_webmention_target weeds out silos and non-HTML targets # that we wouldn't want to download and parse url, _, ok = util.get_webmention_target(url) if not ok: continue # fetch_mf2 raises a fuss if it can't fetch a mf2 document; # easier to just grab this ourselves than add a bunch of # special-cases to that method logging.debug('expand_target_urls fetching field=%s, url=%s', field, url) try: resp = requests.get(url, timeout=HTTP_TIMEOUT) resp.raise_for_status() data = mf2py.Parser(url=url, doc=resp.text).to_dict() except AssertionError: raise # for unit tests except BaseException: # it's not a big deal if we can't fetch an in-reply-to url logging.warning('expand_target_urls could not fetch field=%s, url=%s', field, url, exc_info=True) continue synd_urls = data.get('rels', {}).get('syndication', []) # look for syndication urls in the first h-entry queue = collections.deque(data.get('items', [])) while queue: item = queue.popleft() item_types = set(item.get('type', [])) if 'h-feed' in item_types and 'h-entry' not in item_types: queue.extend(item.get('children', [])) continue # these can be urls or h-cites synd_urls += microformats2.get_string_urls( item.get('properties', {}).get('syndication', [])) logging.debug('expand_target_urls found rel=syndication for url=%s: %r', url, synd_urls) augmented += [{'url': u} for u in synd_urls] activity[field] = augmented