Python dedupe_urlsの例

プログラミング言語: Python

名前空間/パッケージ名: oauth_dropins.webutil.util

メソッド/関数: dedupe_urls

hotexamples.comのコード掲載数: 16

Python dedupe_urls - 16件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのoauth_dropins.webutil.util.dedupe_urlsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: common.py プロジェクト: singpolyma/bridgy-fed

def send_webmentions(handler, activity, **response_props):

    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      handler: RequestHandler
      activity: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(handler, '%s activities are not supported yet.' % verb)

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source:
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))
    if verb in ('like', 'share'):
         targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error(handler, "Couldn't find original post URL")
    if not targets:
        error(handler, "Couldn't find target URLs (inReplyTo or object)")

    # send webmentions and store Responses
    errors = []
    for target in targets:
        if not target:
            continue

        response = Response(source=source, target=target, direction='in',
                            **response_props)
        response.put()
        wm_source = response.proxy_url() if verb in ('like', 'share') else source
        logging.info('Sending webmention from %s to %s', wm_source, target)

        wm = send.WebmentionSend(wm_source, target)
        if wm.send(headers=HEADERS):
            logging.info('Success: %s', wm.response)
            response.status = 'complete'
        else:
            logging.warning('Failed: %s', wm.error)
            errors.append(wm.error)
            response.status = 'error'
        response.put()

    if errors:
        msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors)
        error(handler, msg, status=errors[0].get('http_status'))

コード例 #2

ファイルを表示

ファイル: source.py プロジェクト: whyouare111/granary

    def append_in_reply_to(before, after):
        """appends the inReplyTos from the before object to the after object, in place

    Args:
      before, after: dicts, ActivityStreams activities or objects
    """
        obj_b = before.get('object', before)
        obj_a = after.get('object', after)

        if obj_b and obj_a:
            reply_b = util.get_list(obj_b, 'inReplyTo')
            reply_a = util.get_list(obj_a, 'inReplyTo')
            obj_a['inReplyTo'] = util.dedupe_urls(reply_a + reply_b)

コード例 #3

ファイルを表示

    def _create(self,
                obj,
                preview=None,
                include_link=source.OMIT_LINK,
                ignore_formatting=False):
        """Creates or previews a status (aka toot), reply, boost (aka reblog), or favorite.

    https://docs.joinmastodon.org/api/rest/statuses/

    Based on :meth:`Twitter._create`.

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: string
      ignore_formatting: boolean

    Returns: CreationResult. If preview is True, the content will be a unicode
      string HTML snippet. If False, it will be a dict with 'id' and 'url' keys
      for the newly created object.
    """
        assert preview in (False, True)
        type = obj.get('objectType')
        verb = obj.get('verb')

        base_obj = self.base_object(obj)
        base_id = base_obj.get('id')
        base_url = base_obj.get('url')

        is_reply = type == 'comment' or obj.get('inReplyTo')
        is_rsvp = (verb and verb.startswith('rsvp-')) or verb == 'invite'
        atts = obj.get('attachments', [])
        images = util.dedupe_urls(
            util.get_list(obj, 'image') +
            [a for a in atts if a.get('objectType') == 'image'])
        videos = util.dedupe_urls(
            [obj] + [a for a in atts if a.get('objectType') == 'video'],
            key='stream')
        has_media = (images or videos) and (type in ('note', 'article')
                                            or is_reply)

        # prefer displayName over content for articles
        #
        # TODO: handle activities as well as objects? ie pull out ['object'] here if
        # necessary?
        type = obj.get('objectType')
        prefer_content = type == 'note' or (base_url and is_reply)
        preview_description = ''
        content = self._content_for_create(obj,
                                           ignore_formatting=ignore_formatting,
                                           prefer_name=not prefer_content)

        if not content:
            if type == 'activity' and not is_rsvp:
                content = verb
            elif has_media:
                content = ''
            else:
                return source.creation_result(
                    abort=False,  # keep looking for things to publish,
                    error_plain='No content text found.',
                    error_html='No content text found.')

        post_label = '%s %s' % (self.NAME, self.TYPE_LABELS['post'])
        if is_reply and not base_url:
            return source.creation_result(
                abort=True,
                error_plain='Could not find a %s to reply to.' % post_label,
                error_html=
                'Could not find a %s to <a href="http://indiewebcamp.com/reply">reply to</a>. Check that your post has the right <a href="http://indiewebcamp.com/comment">in-reply-to</a> link.'
                % post_label)

        # truncate and ellipsize content if necessary
        # TODO: don't count domains in remote mentions.
        # https://docs.joinmastodon.org/usage/basics/#text
        content = self.truncate(content, obj.get('url'), include_link, type)

        # linkify user mentions
        def linkify_mention(match):
            split = match.group(1).split('@')
            username = split[0]
            instance = ('https://' +
                        split[1]) if len(split) > 1 else self.instance
            url = urllib.parse.urljoin(instance, '/@' + username)
            return '<a href="%s">@%s</a>' % (url, username)

        preview_content = MENTION_RE.sub(linkify_mention, content)

        # linkify (defaults to twitter's behavior)
        preview_content = util.linkify(preview_content,
                                       pretty=True,
                                       skip_bare_cc_tlds=True)
        tags_url = urllib.parse.urljoin(self.instance, '/tags')
        preview_content = HASHTAG_RE.sub(
            r'\1<a href="%s/\2">#\2</a>' % tags_url, preview_content)

        # switch on activity type
        if type == 'activity' and verb == 'like':
            if not base_url:
                return source.creation_result(
                    abort=True,
                    error_plain='Could not find a %s to %s.' %
                    (post_label, self.TYPE_LABELS['like']),
                    error_html=
                    'Could not find a %s to <a href="http://indiewebcamp.com/like">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/like">u-like-of link</a>.'
                    % (post_label, self.TYPE_LABELS['like']))

            if preview:
                preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % (
                    self.TYPE_LABELS['like'], base_url,
                    self.TYPE_LABELS['post'], self.embed_post(base_obj))
                return source.creation_result(description=preview_description)
            else:
                resp = self._post(API_FAVORITE % base_id)
                resp['type'] = 'like'

        elif type == 'activity' and verb == 'share':
            if not base_url:
                return source.creation_result(
                    abort=True,
                    error_plain='Could not find a %s to %s.' %
                    (post_label, self.TYPE_LABELS['repost']),
                    error_html=
                    'Could not find a %s to <a href="http://indiewebcamp.com/repost">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/repost">repost-of</a> link.'
                    % (post_label, self.TYPE_LABELS['repost']))

            if preview:
                preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % (
                    self.TYPE_LABELS['repost'], base_url,
                    self.TYPE_LABELS['post'], self.embed_post(base_obj))
                return source.creation_result(description=preview_description)
            else:
                resp = self._post(API_REBLOG % base_id)
                resp['type'] = 'repost'

        elif type in ('note', 'article') or is_reply or is_rsvp:  # a post
            data = {'status': content}

            if is_reply:
                preview_description += 'add a <span class="verb">%s</span> to <a href="%s">this %s</a>: %s' % (
                    self.TYPE_LABELS['comment'], base_url,
                    self.TYPE_LABELS['post'], self.embed_post(base_obj))
                data['in_reply_to_id'] = base_id
            else:
                preview_description += '<span class="verb">%s</span>:' % self.TYPE_LABELS[
                    'post']

            num_media = len(videos) + len(images)
            if num_media > MAX_MEDIA:
                videos = videos[:MAX_MEDIA]
                images = images[:max(MAX_MEDIA - len(videos), 0)]
                logging.warning('Found %d media! Only using the first %d: %r',
                                num_media, MAX_MEDIA, videos + images)

            if preview:
                media_previews = [
                    '<video controls src="%s"><a href="%s">%s</a></video>' %
                    (util.get_url(vid, key='stream'),
                     util.get_url(vid, key='stream'), vid.get('displayName')
                     or 'this video') for vid in videos
                ] + [
                    '<img src="%s" alt="%s" />' %
                    (util.get_url(img), img.get('displayName') or '')
                    for img in images
                ]
                if media_previews:
                    preview_content += '<br /><br />' + ' &nbsp; '.join(
                        media_previews)
                return source.creation_result(content=preview_content,
                                              description=preview_description)

            else:
                ids = self.upload_media(videos + images)
                if ids:
                    data['media_ids'] = ids
                resp = self._post(API_STATUSES, json=data)

        else:
            return source.creation_result(
                abort=False,
                error_plain='Cannot publish type=%s, verb=%s to Mastodon' %
                (type, verb),
                error_html='Cannot publish type=%s, verb=%s to Mastodon' %
                (type, verb))

        if 'url' not in resp:
            resp['url'] = base_url

        return source.creation_result(resp)

コード例 #4

ファイルを表示

ファイル: microformats2.py プロジェクト: cacimatti/granary

def json_to_object(mf2, actor=None, fetch_mf2=False):
    """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.
    fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary,
      e.g. to determine authorship: https://indieweb.org/authorship

  Returns:
    dict, ActivityStreams object
  """
    if not mf2 or not isinstance(mf2, dict):
        return {}

    mf2 = copy.copy(mf2)
    props = mf2.setdefault('properties', {})
    prop = first_props(props)
    rsvp = prop.get('rsvp')

    # convert author
    mf2_author = prop.get('author')
    if mf2_author and isinstance(mf2_author, dict):
        author = json_to_object(mf2_author)
    else:
        # the author h-card may be on another page. run full authorship algorithm:
        # https://indieweb.org/authorship
        def fetch(url):
            return mf2py.parse(util.requests_get(url).text, url=url)

        author = mf2util.find_author(
            {'items': [mf2]},
            hentry=mf2,
            fetch_mf2_func=fetch if fetch_mf2 else None)
        if author:
            author = {
                'objectType': 'person',
                'url': author.get('url'),
                'displayName': author.get('name'),
                'image': [{
                    'url': author.get('photo')
                }],
            }

    if not author:
        author = actor

    mf2_types = mf2.get('type') or []
    if 'h-geo' in mf2_types or 'p-location' in mf2_types:
        mf2_type = 'location'
    else:
        # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
        # *is* a photo. so, special case photo type to fall through to underlying
        # mf2 type without photo.
        # https://github.com/snarfed/bridgy/issues/702
        without_photo = copy.deepcopy(mf2)
        without_photo.get('properties', {}).pop('photo', None)
        mf2_type = mf2util.post_type_discovery(without_photo)

    as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None))
    if rsvp:
        as_verb = 'rsvp-%s' % rsvp

    # special case GitHub issues that are in-reply-to the repo or its issues URL
    in_reply_tos = get_string_urls(props.get('in-reply-to', []))
    for url in in_reply_tos:
        if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url):
            as_type = 'issue'

    def absolute_urls(prop):
        return [
            url for url in get_string_urls(props.get(prop, []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urllib.parse.urlparse(url).netloc
        ]

    urls = props.get('url') and get_string_urls(props.get('url'))

    # quotations: https://indieweb.org/quotation#How_to_markup
    attachments = [
        json_to_object(quote)
        for quote in mf2.get('children', []) + props.get('quotation-of', [])
        if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))
    ]

    # audio and video
    for type in 'audio', 'video':
        attachments.extend({
            'objectType': type,
            'stream': {
                'url': url
            }
        } for url in get_string_urls(props.get(type, [])))

    obj = {
        'id':
        prop.get('uid'),
        'objectType':
        as_type,
        'verb':
        as_verb,
        'published':
        prop.get('published', ''),
        'updated':
        prop.get('updated', ''),
        'startTime':
        prop.get('start'),
        'endTime':
        prop.get('end'),
        'displayName':
        get_text(prop.get('name')),
        'summary':
        get_text(prop.get('summary')),
        'content':
        get_html(prop.get('content')),
        'url':
        urls[0] if urls else None,
        'urls': [{
            'value': u
        } for u in urls] if urls and len(urls) > 1 else None,
        'image': [{
            'url': url
        } for url in dedupe_urls(
            absolute_urls('photo') + absolute_urls('featured'))],
        'stream': [{
            'url': url
        } for url in absolute_urls('video')],
        'location':
        json_to_object(prop.get('location')),
        'replies': {
            'items': [json_to_object(c) for c in props.get('comment', [])]
        },
        'tags': [{
            'objectType': 'hashtag',
            'displayName': cat
        } if isinstance(cat, basestring) else json_to_object(cat)
                 for cat in props.get('category', [])],
        'attachments':
        attachments,
    }

    # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
    interpreted = mf2util.interpret({'items': [mf2]}, None)
    if interpreted:
        loc = interpreted.get('location')
        if loc:
            obj['location']['objectType'] = 'place'
            lat, lng = loc.get('latitude'), loc.get('longitude')
            if lat and lng:
                try:
                    obj['location'].update({
                        'latitude': float(lat),
                        'longitude': float(lng),
                    })
                except ValueError:
                    logging.warn(
                        'Could not convert latitude/longitude (%s, %s) to decimal',
                        lat, lng)

    if as_type == 'activity':
        objects = []
        for target in itertools.chain.from_iterable(
                props.get(field, [])
                for field in ('like', 'like-of', 'repost', 'repost-of',
                              'in-reply-to', 'invitee')):
            t = json_to_object(target) if isinstance(target, dict) else {
                'url': target
            }
            # eliminate duplicates from redundant backcompat properties
            if t not in objects:
                objects.append(t)
        obj.update({
            'object': objects[0] if len(objects) == 1 else objects,
            'actor': author,
        })
    else:
        obj.update({
            'inReplyTo': [{
                'url': url
            } for url in in_reply_tos],
            'author': author,
        })

    return source.Source.postprocess_object(obj)

コード例 #5

ファイルを表示

ファイル: microformats2.py プロジェクト: cacimatti/granary

def object_to_json(obj,
                   trim_nulls=True,
                   entry_class='h-entry',
                   default_object_type=None,
                   synthesize_content=True):
    """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
    if not obj or not isinstance(obj, dict):
        return {}

    obj_type = source.object_type(obj) or default_object_type
    # if the activity type is a post, then it's really just a conduit
    # for the object. for other verbs, the activity itself is the
    # interesting thing
    if obj_type == 'post':
        primary = obj.get('object', {})
        obj_type = source.object_type(primary) or default_object_type
    else:
        primary = obj

    # TODO: extract snippet
    name = primary.get('displayName', primary.get('title'))
    summary = primary.get('summary')
    author = obj.get('author', obj.get('actor', {}))

    in_reply_tos = obj.get('inReplyTo',
                           obj.get('context', {}).get('inReplyTo', []))
    is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
    if (is_rsvp or obj_type == 'react') and obj.get('object'):
        objs = obj['object']
        in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

    # maps objectType to list of objects
    attachments = defaultdict(list)
    for prop in 'attachments', 'tags':
        for elem in get_list(primary, prop):
            attachments[elem.get('objectType')].append(elem)

    # construct mf2!
    ret = {
        'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(
            entry_class, basestring) else list(entry_class)),
        'properties': {
            'uid': [obj.get('id') or ''],
            'numeric-id': [obj.get('numeric_id') or ''],
            'name': [name],
            'nickname': [obj.get('username') or ''],
            'summary': [summary],
            'url': (list(object_urls(obj) or object_urls(primary)) +
                    obj.get('upstreamDuplicates', [])),
            'photo':
            dedupe_urls(
                get_urls(attachments, 'image', 'image') +
                get_urls(primary, 'image')),
            'video':
            dedupe_urls(
                get_urls(attachments, 'video', 'stream') +
                get_urls(primary, 'stream')),
            'audio':
            get_urls(attachments, 'audio', 'stream'),
            'published': [obj.get('published', primary.get('published', ''))],
            'updated': [obj.get('updated', primary.get('updated', ''))],
            'content': [{
                'value':
                xml.sax.saxutils.unescape(primary.get('content', '')),
                'html':
                render_content(primary,
                               include_location=False,
                               synthesize_content=synthesize_content),
            }],
            'in-reply-to':
            util.trim_nulls([o.get('url') for o in in_reply_tos]),
            'author': [
                object_to_json(author,
                               trim_nulls=False,
                               default_object_type='person')
            ],
            'location': [
                object_to_json(primary.get('location', {}),
                               trim_nulls=False,
                               default_object_type='place')
            ],
            'comment': [
                object_to_json(c, trim_nulls=False, entry_class='h-cite')
                for c in obj.get('replies', {}).get('items', [])
            ],
            'start': [primary.get('startTime')],
            'end': [primary.get('endTime')],
        },
        'children': [
            object_to_json(a,
                           trim_nulls=False,
                           entry_class=['u-quotation-of', 'h-cite'])
            for a in attachments['note'] + attachments['article']
        ]
    }

    # hashtags and person tags
    tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
    ret['properties']['category'] = []
    for tag in tags:
        if tag.get('objectType') == 'person':
            ret['properties']['category'].append(
                object_to_json(tag, entry_class='u-category h-card'))
        elif tag.get('objectType') == 'hashtag':
            name = tag.get('displayName')
            if name:
                ret['properties']['category'].append(name)

    # rsvp
    if is_rsvp:
        ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
    elif obj_type == 'invite':
        invitee = object_to_json(obj.get('object'),
                                 trim_nulls=False,
                                 default_object_type='person')
        ret['properties']['invitee'] = [invitee]

    # like and repost mentions
    for type, prop in ('favorite', 'like'), ('like', 'like'), ('share',
                                                               'repost'):
        if obj_type == type:
            # The ActivityStreams spec says the object property should always be a
            # single object, but it's useful to let it be a list, e.g. when a like has
            # multiple targets, e.g. a like of a post with original post URLs in it,
            # which brid.gy does.
            objs = get_list(obj, 'object')
            ret['properties'][prop + '-of'] = [
                # flatten contexts that are just a url
                o['url']
                if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
                else object_to_json(o, trim_nulls=False, entry_class='h-cite')
                for o in objs
            ]
        else:
            # received likes and reposts
            ret['properties'][prop] = [
                object_to_json(t, trim_nulls=False, entry_class='h-cite')
                for t in tags if source.object_type(t) == type
            ]

    # latitude & longitude
    lat = long = None
    position = ISO_6709_RE.match(primary.get('position') or '')
    if position:
        lat, long = position.groups()
    if not lat:
        lat = primary.get('latitude')
    if not long:
        long = primary.get('longitude')

    if lat:
        ret['properties']['latitude'] = [str(lat)]
    if long:
        ret['properties']['longitude'] = [str(long)]

    if trim_nulls:
        ret = util.trim_nulls(ret)
    return ret

コード例 #6

ファイルを表示

ファイル: source.py プロジェクト: ALIrezanouri/granary

    def original_post_discovery(activity,
                                domains=None,
                                cache=None,
                                include_redirect_sources=True,
                                **kwargs):
        """Discovers original post links.

    This is a variation on http://indiewebcamp.com/original-post-discovery . It
    differs in that it finds multiple candidate links instead of one, and it
    doesn't bother looking for MF2 (etc) markup because the silos don't let you
    input it. More background:
    https://github.com/snarfed/bridgy/issues/51#issuecomment-136018857

    Original post candidates come from the upstreamDuplicates, attachments, and
    tags fields, as well as links and permashortlinks/permashortcitations in the
    text content.

    Args:
      activity: activity dict
      domains: optional sequence of domains. If provided, only links to these
        domains will be considered original and stored in upstreamDuplicates.
        (Permashortcitations are exempt.)
      cache: optional, a cache object for storing resolved URL redirects. Passed
        to follow_redirects().
      include_redirect_sources: boolean, whether to include URLs that redirect
        as well as their final destination URLs
      kwargs: passed to requests.head() when following redirects

    Returns:
      ([string original post URLs], [string mention URLs]) tuple
    """
        obj = activity.get('object') or activity
        content = obj.get('content', '').strip()

        # find all candidate URLs
        tags = [
            t.get('url')
            for t in obj.get('attachments', []) + obj.get('tags', [])
            if t.get('objectType') in ('article', 'mention', None)
        ]
        candidates = tags + util.extract_links(content) + obj.get(
            'upstreamDuplicates', [])

        # Permashortcitations (http://indiewebcamp.com/permashortcitation) are short
        # references to canonical copies of a given (usually syndicated) post, of
        # the form (DOMAIN PATH). We consider them an explicit original post link.
        candidates += [
            match.expand(r'http://\1/\2')
            for match in Source._PERMASHORTCITATION_RE.finditer(content)
        ]

        candidates = set(
            filter(
                None,
                (
                    util.clean_url(url) for url in candidates
                    # heuristic: ellipsized URLs are probably incomplete, so omit them.
                    if url and not url.endswith('...')
                    and not url.endswith(u'…'))))

        # check for redirect and add their final urls
        redirects = {}  # maps final URL to original URL for redirects
        for url in list(candidates):
            resolved = util.follow_redirects(url, cache=cache, **kwargs)
            if (resolved.url != url and resolved.headers.get(
                    'content-type', '').startswith('text/html')):
                redirects[resolved.url] = url
                candidates.add(resolved.url)

        # use domains to determine which URLs are original post links vs mentions
        originals = set()
        mentions = set()
        for url in util.dedupe_urls(candidates):
            if url in redirects.values():
                # this is a redirected original URL. postpone and handle it when we hit
                # its final URL so that we know the final domain.
                continue
            domain = util.domain_from_link(url)
            which = (originals if not domains or util.domain_or_parent_in(
                domain, domains) else mentions)
            which.add(url)
            redirected_from = redirects.get(url)
            if redirected_from and include_redirect_sources:
                which.add(redirected_from)

        logging.info(
            'Original post discovery found original posts %s, mentions %s',
            originals, mentions)
        return originals, mentions

コード例 #7

ファイルを表示

ファイル: microformats2.py プロジェクト: snarfed/granary

def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', []))
  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # maps objectType to list of objects
  attachments = defaultdict(list)
  for prop in 'attachments', 'tags':
    for elem in get_list(primary, prop):
      attachments[elem.get('objectType')].append(elem)

  # construct mf2!
  ret = {
    'type': (AS_TO_MF2_TYPE.get(obj_type) or
             [entry_class] if isinstance(entry_class, basestring)
             else list(entry_class)),
    'properties': {
      'uid': [obj.get('id') or ''],
      'numeric-id': [obj.get('numeric_id') or ''],
      'name': [name],
      'nickname': [obj.get('username') or ''],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      # photo is special cased below, to handle alt
      'video': dedupe_urls(get_urls(attachments, 'video', 'stream') +
                           get_urls(primary, 'stream')),
      'audio': get_urls(attachments, 'audio', 'stream'),
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'content': [{
          'value': xml.sax.saxutils.unescape(primary.get('content', '')),
          'html': render_content(primary, include_location=False,
                                 synthesize_content=synthesize_content),
      }],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
      'start': [primary.get('startTime')],
      'end': [primary.get('endTime')],
    },
    'children': (
      # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that
      # something is being "quoted," like in a quote tweet, so i cheat and use
      # extra knowledge here that quoted tweets are converted to note
      # attachments, but URLs in the tweet text are converted to article tags.
      [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite'])
       for a in attachments['note'] if 'startIndex' not in a] +
      [object_to_json(a, trim_nulls=False, entry_class=['h-cite'])
       for a in attachments['article'] if 'startIndex' not in a])
  }

  # photos, including alt text
  photo_urls = set()
  ret['properties']['photo'] = []
  for image in get_list(attachments, 'image') + [primary]:
    for url in get_urls(image, 'image'):
      if url and url not in photo_urls:
        photo_urls.add(url)
        name = get_first(image, 'image', {}).get('displayName')
        ret['properties']['photo'].append({'value': url, 'alt': name} if name else url)

  # hashtags and person tags
  if obj_type == 'tag':
    ret['properties']['tag-of'] = util.get_urls(obj, 'target')

  tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
  if not tags and obj_type == 'tag':
    tags = util.get_list(obj, 'object')
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      ret['properties']['category'].append(
        object_to_json(tag, entry_class='u-category h-card'))
    elif tag.get('objectType') == 'hashtag' or obj_type == 'tag':
      name = tag.get('displayName')
      if name:
        ret['properties']['category'].append(name)

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in (
      ('favorite', 'like'),
      ('follow', 'follow'),
      ('like', 'like'),
      ('share', 'repost'),
  ):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  # latitude & longitude
  lat = long = None
  position = ISO_6709_RE.match(primary.get('position') or '')
  if position:
    lat, long = position.groups()
  if not lat:
    lat = primary.get('latitude')
  if not long:
    long = primary.get('longitude')

  if lat:
    ret['properties']['latitude'] = [str(lat)]
  if long:
    ret['properties']['longitude'] = [str(long)]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret

コード例 #8

ファイルを表示

ファイル: source.py プロジェクト: harixxy/granary

    def original_post_discovery(activity, domains=None, cache=None, include_redirect_sources=True, **kwargs):
        """Discovers original post links.

    This is a variation on http://indiewebcamp.com/original-post-discovery . It
    differs in that it finds multiple candidate links instead of one, and it
    doesn't bother looking for MF2 (etc) markup because the silos don't let you
    input it. More background:
    https://github.com/snarfed/bridgy/issues/51#issuecomment-136018857

    Original post candidates come from the upstreamDuplicates, attachments, and
    tags fields, as well as links and permashortlinks/permashortcitations in the
    text content.

    Args:
      activity: activity dict
      domains: optional sequence of domains. If provided, only links to these
        domains will be considered original and stored in upstreamDuplicates.
        (Permashortcitations are exempt.)
      cache: optional, a cache object for storing resolved URL redirects. Passed
        to follow_redirects().
      include_redirect_sources: boolean, whether to include URLs that redirect
        as well as their final destination URLs
      kwargs: passed to requests.head() when following redirects

    Returns: ([string original post URLs], [string mention URLs]) tuple
    """
        obj = activity.get("object") or activity
        content = obj.get("content", "").strip()

        # find all candidate URLs
        tags = [
            t.get("url")
            for t in obj.get("attachments", []) + obj.get("tags", [])
            if t.get("objectType") in ("article", "mention", None)
        ]
        candidates = tags + util.extract_links(content) + obj.get("upstreamDuplicates", [])

        # Permashortcitations (http://indiewebcamp.com/permashortcitation) are short
        # references to canonical copies of a given (usually syndicated) post, of
        # the form (DOMAIN PATH). We consider them an explicit original post link.
        candidates += [match.expand(r"http://\1/\2") for match in Source._PERMASHORTCITATION_RE.finditer(content)]

        candidates = set(
            filter(
                None,
                (
                    util.clean_url(url)
                    for url in candidates
                    # heuristic: ellipsized URLs are probably incomplete, so omit them.
                    if url and not url.endswith("...") and not url.endswith(u"…")
                ),
            )
        )

        # check for redirect and add their final urls
        redirects = {}  # maps final URL to original URL for redirects
        for url in list(candidates):
            resolved = follow_redirects(url, cache=cache, **kwargs)
            if resolved.url != url and resolved.headers.get("content-type", "").startswith("text/html"):
                redirects[resolved.url] = url
                candidates.add(resolved.url)

        # use domains to determine which URLs are original post links vs mentions
        originals = set()
        mentions = set()
        for url in util.dedupe_urls(candidates):
            if url in redirects.values():
                # this is a redirected original URL. postpone and handle it when we hit
                # its final URL so that we know the final domain.
                continue
            which = originals if not domains or util.domain_from_link(url) in domains else mentions
            which.add(url)
            redirected_from = redirects.get(url)
            if redirected_from and include_redirect_sources:
                which.add(redirected_from)

        logging.info("Original post discovery found original posts %s, mentions %s", originals, mentions)
        return originals, mentions

コード例 #9

ファイルを表示

ファイル: common.py プロジェクト: snarfed/bridgy-fed

def postprocess_as2(activity, target=None, key=None):
    """Prepare an AS2 object to be served or sent via ActivityPub.

    Args:
      activity: dict, AS2 object or activity
      target: dict, AS2 object, optional. The target of activity's inReplyTo or
        Like/Announce/etc object, if any.
      key: :class:`models.MagicKey`, optional. populated into publicKey field
        if provided.
    """
    type = activity.get('type')

    # actor objects
    if type == 'Person':
        postprocess_as2_actor(activity)
        if not activity.get('publicKey'):
            # underspecified, inferred from this issue and Mastodon's implementation:
            # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229
            # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77
            activity.update({
                'publicKey': {
                    'id': activity.get('preferredUsername'),
                    'publicKeyPem': key.public_pem().decode(),
                },
                '@context': (util.get_list(activity, '@context') +
                             ['https://w3id.org/security/v1']),
            })
        return activity

    for actor in (util.get_list(activity, 'attributedTo') +
                  util.get_list(activity, 'actor')):
        postprocess_as2_actor(actor)

    # inReplyTo: singly valued, prefer id over url
    target_id = target.get('id') if target else None
    in_reply_to = activity.get('inReplyTo')
    if in_reply_to:
        if target_id:
            activity['inReplyTo'] = target_id
        elif isinstance(in_reply_to, list):
            if len(in_reply_to) > 1:
                logging.warning("AS2 doesn't support multiple inReplyTo URLs! "
                                'Only using the first: %s' % in_reply_to[0])
            activity['inReplyTo'] = in_reply_to[0]

        # Mastodon evidently requires a Mention tag for replies to generate a
        # notification to the original post's author. not required for likes,
        # reposts, etc. details:
        # https://github.com/snarfed/bridgy-fed/issues/34
        if target:
            for to in (util.get_list(target, 'attributedTo') +
                       util.get_list(target, 'actor')):
                if isinstance(to, dict):
                    to = to.get('url') or to.get('id')
                if to:
                    activity.setdefault('tag', []).append({
                        'type': 'Mention',
                        'href': to,
                    })

    # activity objects (for Like, Announce, etc): prefer id over url
    obj = activity.get('object')
    if obj:
        if isinstance(obj, dict) and not obj.get('id'):
            obj['id'] = target_id or obj.get('url')
        elif target_id and obj != target_id:
            activity['object'] = target_id

    # id is required for most things. default to url if it's not set.
    if not activity.get('id'):
        activity['id'] = activity.get('url')

    # TODO: find a better way to check this, sometimes or always?
    # removed for now since it fires on posts without u-id or u-url, eg
    # https://chrisbeckstrom.com/2018/12/27/32551/
    # assert activity.get('id') or (isinstance(obj, dict) and obj.get('id'))

    activity['id'] = redirect_wrap(activity.get('id'))
    activity['url'] = redirect_wrap(activity.get('url'))

    # copy image(s) into attachment(s). may be Mastodon-specific.
    # https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618
    obj_or_activity = obj if isinstance(obj, dict) else activity
    obj_or_activity.setdefault('attachment',
                               []).extend(obj_or_activity.get('image', []))

    # cc public and target's author(s) and recipients
    # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting
    # https://w3c.github.io/activitypub/#delivery
    if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'):
        recips = [AS2_PUBLIC_AUDIENCE]
        if target:
            recips += itertools.chain(*(util.get_list(target, field)
                                        for field in ('actor', 'attributedTo',
                                                      'to', 'cc')))
        activity['cc'] = util.dedupe_urls(
            util.get_url(recip) or recip.get('id') for recip in recips)

    # wrap articles and notes in a Create activity
    if type in ('Article', 'Note'):
        activity = {
            '@context': as2.CONTEXT,
            'type': 'Create',
            'id': f'{activity["id"]}#bridgy-fed-create',
            'object': activity,
        }

    return util.trim_nulls(activity)

コード例 #10

ファイルを表示

ファイル: common.py プロジェクト: snarfed/bridgy-fed

def send_webmentions(activity_wrapped, proxy=None, **response_props):
    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      activity_wrapped: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    activity = redirect_unwrap(activity_wrapped)

    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(f'{verb} activities are not supported yet.')

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source or verb in ('create', 'post', 'update'):
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))

    tags = util.get_list(activity_wrapped, 'tags')
    obj_wrapped = activity_wrapped.get('object')
    if isinstance(obj_wrapped, dict):
        tags.extend(util.get_list(obj_wrapped, 'tags'))
    for tag in tags:
        if tag.get('objectType') == 'mention':
            url = tag.get('url')
            if url and url.startswith(request.host_url):
                targets.append(redirect_unwrap(url))

    if verb in ('follow', 'like', 'share'):
        targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error("Couldn't find original post URL")
    if not targets:
        error(
            "Couldn't find any target URLs in inReplyTo, object, or mention tags"
        )

    # send webmentions and store Responses
    errors = []  # stores (code, body) tuples
    for target in targets:
        if util.domain_from_link(target) == util.domain_from_link(source):
            logging.info(
                f'Skipping same-domain webmention from {source} to {target}')
            continue

        response = Response(source=source,
                            target=target,
                            direction='in',
                            **response_props)
        response.put()
        wm_source = (response.proxy_url() if
                     verb in ('follow', 'like', 'share') or proxy else source)
        logging.info(f'Sending webmention from {wm_source} to {target}')

        try:
            endpoint = webmention.discover(target, headers=HEADERS).endpoint
            if endpoint:
                webmention.send(endpoint, wm_source, target, headers=HEADERS)
                response.status = 'complete'
                logging.info('Success!')
            else:
                response.status = 'ignored'
                logging.info('Ignoring.')
        except BaseException as e:
            errors.append(util.interpret_http_exception(e))
        response.put()

    if errors:
        msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors)
        error(msg, status=int(errors[0][0] or 502))

コード例 #11

ファイルを表示

    def template_vars(self, domain, url=None):
        assert domain

        if domain.split('.')[-1] in NON_TLDS:
            common.error(self,
                         "%s doesn't look like a domain" % domain,
                         status=404)

        # find representative h-card. try url, then url's home page, then domain
        urls = ['http://%s/' % domain]
        if url:
            urls = [url, urlparse.urljoin(url, '/')] + urls

        for candidate in urls:
            resp = common.requests_get(candidate)
            parsed = common.beautifulsoup_parse(resp.content,
                                                from_encoding=resp.encoding)
            mf2 = mf2py.parse(parsed, url=resp.url, img_with_alt=True)
            # logging.debug('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2))
            hcard = mf2util.representative_hcard(mf2, resp.url)
            if hcard:
                logging.info('Representative h-card: %s',
                             json.dumps(hcard, indent=2))
                break
        else:
            common.error(
                self, """\
Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on %s"""
                % resp.url)

        logging.info('Generating WebFinger data for %s', domain)
        key = models.MagicKey.get_or_create(domain)
        props = hcard.get('properties', {})
        urls = util.dedupe_urls(props.get('url', []) + [resp.url])
        canonical_url = urls[0]

        acct = '%s@%s' % (domain, domain)
        for url in urls:
            if url.startswith('acct:'):
                urluser, urldomain = util.parse_acct_uri(url)
                if urldomain == domain:
                    acct = '%s@%s' % (urluser, domain)
                    logging.info('Found custom username: acct:%s', acct)
                    break

        # discover atom feed, if any
        atom = parsed.find('link',
                           rel='alternate',
                           type=common.CONTENT_TYPE_ATOM)
        if atom and atom['href']:
            atom = urlparse.urljoin(resp.url, atom['href'])
        else:
            atom = 'https://granary.io/url?' + urllib.urlencode(
                {
                    'input': 'html',
                    'output': 'atom',
                    'url': resp.url,
                    'hub': resp.url,
                })

        # discover PuSH, if any
        for link in resp.headers.get('Link', '').split(','):
            match = common.LINK_HEADER_RE.match(link)
            if match and match.group(2) == 'hub':
                hub = match.group(1)
            else:
                hub = 'https://bridgy-fed.superfeedr.com/'

        # generate webfinger content
        data = util.trim_nulls({
            'subject':
            'acct:' + acct,
            'aliases':
            urls,
            'magic_keys': [{
                'value': key.href()
            }],
            'links':
            sum(([{
                'rel': 'http://webfinger.net/rel/profile-page',
                'type': 'text/html',
                'href': url,
            }] for url in urls if url.startswith("http")), []) + [{
                'rel': 'http://webfinger.net/rel/avatar',
                'href': url,
            } for url in props.get('photo', [])] + [
                {
                    'rel': 'canonical_uri',
                    'type': 'text/html',
                    'href': canonical_url,
                },

                # ActivityPub
                {
                    'rel': 'self',
                    'type': 'application/activity+json',
                    # use HOST_URL instead of e.g. request.host_url because it
                    # sometimes lost port, e.g. http://localhost:8080 would become
                    # just http://localhost. no clue how or why.
                    'href': '%s/%s' % (appengine_config.HOST_URL, domain),
                },
                {
                    'rel': 'inbox',
                    'type': 'application/activity+json',
                    'href': '%s/%s/inbox' %
                    (appengine_config.HOST_URL, domain),
                },

                # OStatus
                {
                    'rel': 'http://schemas.google.com/g/2010#updates-from',
                    'type': common.CONTENT_TYPE_ATOM,
                    'href': atom,
                },
                {
                    'rel': 'hub',
                    'href': hub,
                },
                {
                    'rel': 'magic-public-key',
                    'href': key.href(),
                },
                {
                    'rel': 'salmon',
                    'href': '%s/%s/salmon' %
                    (appengine_config.HOST_URL, domain),
                }
            ]
        })
        logging.info('Returning WebFinger data: %s', json.dumps(data,
                                                                indent=2))
        return data

コード例 #12

ファイルを表示

ファイル: common.py プロジェクト: singpolyma/bridgy-fed

def postprocess_as2(activity, target=None, key=None):
    """Prepare an AS2 object to be served or sent via ActivityPub.

    Args:
      activity: dict, AS2 object or activity
      target: dict, AS2 object, optional. The target of activity's inReplyTo or
        Like/Announce/etc object, if any.
      key: MagicKey, optional. populated into publicKey field if provided.
    """
    type = activity.get('type')

    # actor objects
    if type == 'Person':
        postprocess_as2_actor(activity)
        if not activity.get('publicKey'):
            # underspecified, inferred from this issue and Mastodon's implementation:
            # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229
            # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77
            activity['publicKey'] = {
                'publicKeyPem': key.public_pem(),
            }
        return activity

    for actor in (util.get_list(activity, 'attributedTo') +
                  util.get_list(activity, 'actor')):
        postprocess_as2_actor(actor)

    # inReplyTo: singly valued, prefer id over url
    target_id = target.get('id') if target else None
    in_reply_to = activity.get('inReplyTo')
    if in_reply_to:
        if target_id:
            activity['inReplyTo'] = target_id
        elif isinstance(in_reply_to, list):
            if len(in_reply_to) > 1:
                logging.warning(
                    "AS2 doesn't support multiple inReplyTo URLs! "
                    'Only using the first: %s' % in_reply_tos[0])
            activity['inReplyTo'] = in_reply_to[0]

    # activity objects (for Like, Announce, etc): prefer id over url
    obj = activity.get('object', {})
    if obj:
        if isinstance(obj, dict) and not obj.get('id'):
            obj['id'] = target_id or obj.get('url')
        elif obj != target_id:
            activity['object'] = target_id

    # id is required for most things. default to url if it's not set.
    if not activity.get('id'):
        activity['id'] = activity.get('url')

    assert activity.get('id') or (isinstance(obj, dict) and obj.get('id'))

    # cc public and target's author(s) and recipients
    # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting
    # https://w3c.github.io/activitypub/#delivery
    if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'):
        recips = [AS2_PUBLIC_AUDIENCE]
        if target:
            recips += itertools.chain(*(util.get_list(target, field) for field in
                                        ('actor', 'attributedTo', 'to', 'cc')))
        activity['cc'] = util.dedupe_urls(util.get_url(recip) for recip in recips)

    # wrap articles and notes in a Create activity
    if type in ('Article', 'Note'):
        activity = {
            '@context': as2.CONTEXT,
            'type': 'Create',
            'object': activity,
        }

    return util.trim_nulls(activity)

コード例 #13

ファイルを表示

def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get('inReplyTo') or []
  if not in_reply_tos:
    context = obj.get('context')
    if context and isinstance(context, dict):
      in_reply_tos = context.get('inReplyTo') or []

  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # maps objectType to list of objects
  attachments = defaultdict(list)
  for prop in 'attachments', 'tags':
    for elem in get_list(primary, prop):
      attachments[elem.get('objectType')].append(elem)

  # prefer duration and size from object's stream, then first video, then first
  # audio
  stream = {}
  for candidate in [obj] + attachments['video'] + attachments['audio']:
    for stream in get_list(candidate, 'stream'):
      if stream:
        break

  duration = stream.get('duration')
  if duration is not None:
    if util.is_int(duration):
      duration = str(duration)
    else:
      logging('Ignoring duration %r; expected int, got %s', duration.__class__)
      duration = None

  sizes = []
  size = stream.get('size')
  if size:
    sizes = [str(size)]

  # construct mf2!
  ret = {
    'type': (AS_TO_MF2_TYPE.get(obj_type) or
             [entry_class] if isinstance(entry_class, str)
             else list(entry_class)),
    'properties': {
      'uid': [obj.get('id') or ''],
      'numeric-id': [obj.get('numeric_id') or ''],
      'name': [name],
      'nickname': [obj.get('username') or ''],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      # photo is special cased below, to handle alt
      'video': dedupe_urls(get_urls(attachments, 'video', 'stream') +
                           get_urls(primary, 'stream')),
      'audio': get_urls(attachments, 'audio', 'stream'),
      'duration': [duration],
      'size': sizes,
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
      'start': [primary.get('startTime')],
      'end': [primary.get('endTime')],
    },
    'children': (
      # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that
      # something is being "quoted," like in a quote tweet, so i cheat and use
      # extra knowledge here that quoted tweets are converted to note
      # attachments, but URLs in the tweet text are converted to article tags.
      [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite'])
       for a in attachments['note'] if 'startIndex' not in a] +
      [object_to_json(a, trim_nulls=False, entry_class=['h-cite'])
       for a in attachments['article'] if 'startIndex' not in a])
  }

  # content. emulate e- vs p- microformats2 parsing: e- if there are HTML tags,
  # otherwise p-.
  # https://indiewebcamp.com/note#Indieweb_whitespace_thinking
  text = xml.sax.saxutils.unescape(primary.get('content', ''))
  html = render_content(primary, include_location=False,
                        synthesize_content=synthesize_content)
  if '<' in html:
    ret['properties']['content'] = [{'value': text, 'html': html}]
  else:
    ret['properties']['content'] = [text]

  # photos, including alt text
  photo_urls = set()
  ret['properties']['photo'] = []
  for image in get_list(attachments, 'image') + [primary]:
    for url in get_urls(image, 'image'):
      if url and url not in photo_urls:
        photo_urls.add(url)
        name = get_first(image, 'image', {}).get('displayName')
        ret['properties']['photo'].append({'value': url, 'alt': name} if name else url)

  # hashtags and person tags
  if obj_type == 'tag':
    ret['properties']['tag-of'] = util.get_urls(obj, 'target')

  tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
  if not tags and obj_type == 'tag':
    tags = util.get_list(obj, 'object')
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      ret['properties']['category'].append(
        object_to_json(tag, entry_class='u-category h-card'))
    elif tag.get('objectType') == 'hashtag' or obj_type == 'tag':
      name = tag.get('displayName')
      if name:
        ret['properties']['category'].append(name)

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in (
      ('favorite', 'like'),
      ('follow', 'follow'),
      ('like', 'like'),
      ('share', 'repost'),
  ):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  # latitude & longitude
  lat = long = None
  position = ISO_6709_RE.match(primary.get('position') or '')
  if position:
    lat, long = position.groups()
  if not lat:
    lat = primary.get('latitude')
  if not long:
    long = primary.get('longitude')

  if lat:
    ret['properties']['latitude'] = [str(lat)]
  if long:
    ret['properties']['longitude'] = [str(long)]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret

コード例 #14

ファイルを表示

def send_webmentions(handler, activity_wrapped, proxy=None, **response_props):
    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      handler: RequestHandler
      activity_wrapped: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    activity = common.redirect_unwrap(activity_wrapped)

    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(handler, '%s activities are not supported yet.' % verb)

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source or verb in ('create', 'post', 'update'):
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))

    tags = util.get_list(activity_wrapped, 'tags')
    obj_wrapped = activity_wrapped.get('object')
    if isinstance(obj_wrapped, dict):
        tags.extend(util.get_list(obj_wrapped, 'tags'))
    for tag in tags:
        if tag.get('objectType') == 'mention':
            url = tag.get('url')
            if url and url.startswith(appengine_config.HOST_URL):
                targets.append(redirect_unwrap(url))

    if verb in ('follow', 'like', 'share'):
         targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error(handler, "Couldn't find original post URL")
    if not targets:
        error(handler, "Couldn't find any target URLs in inReplyTo, object, or mention tags")

    # send webmentions and store Responses
    errors = []
    for target in targets:
        if util.domain_from_link(target) == util.domain_from_link(source):
            logging.info('Skipping same-domain webmention from %s to %s',
                         source, target)
            continue

        response = Response(source=source, target=target, direction='in',
                            **response_props)
        response.put()
        wm_source = (response.proxy_url()
                     if verb in ('follow', 'like', 'share') or proxy
                     else source)
        logging.info('Sending webmention from %s to %s', wm_source, target)

        wm = send.WebmentionSend(wm_source, target)
        if wm.send(headers=HEADERS):
            logging.info('Success: %s', wm.response)
            response.status = 'complete'
        else:
            logging.warning('Failed: %s', wm.error)
            errors.append(wm.error)
            response.status = 'error'
        response.put()

    if errors:
        msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors)
        error(handler, msg, status=errors[0].get('http_status'))

コード例 #15

ファイルを表示

ファイル: microformats2.py プロジェクト: sebsel/granary

def json_to_object(mf2, actor=None):
    """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.

  Returns:
    dict, ActivityStreams object
  """
    if not mf2 or not isinstance(mf2, dict):
        return {}

    mf2 = copy.copy(mf2)
    props = mf2.setdefault('properties', {})
    prop = first_props(props)
    rsvp = prop.get('rsvp')
    rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None
    author = json_to_object(prop['author']) if prop.get('author') else actor

    # maps mf2 type to ActivityStreams objectType and optional verb.
    mf2_type_to_as_type = {
        'rsvp': ('activity', rsvp_verb),
        'invite': ('activity', 'invite'),
        'repost': ('activity', 'share'),
        'like': ('activity', 'like'),
        'reply': ('comment', None),
        'person': ('person', None),
        'location': ('place', None),
        'note': ('note', None),
        'article': ('article', None),
    }

    mf2_types = mf2.get('type') or []
    if 'h-geo' in mf2_types or 'p-location' in mf2_types:
        mf2_type = 'location'
    else:
        # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
        # *is* a photo. so, special case photo type to fall through to underlying
        # mf2 type without photo.
        # https://github.com/snarfed/bridgy/issues/702
        without_photo = copy.deepcopy(mf2)
        without_photo.get('properties', {}).pop('photo', None)
        mf2_type = mf2util.post_type_discovery(without_photo)

    as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None))

    def absolute_urls(prop):
        return [
            url for url in get_string_urls(props.get(prop, []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urlparse.urlparse(url).netloc
        ]

    urls = props.get('url') and get_string_urls(props.get('url'))

    # quotations: https://indieweb.org/quotation#How_to_markup
    attachments = [
        json_to_object(quote)
        for quote in mf2.get('children', []) + props.get('quotation-of', [])
        if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))
    ]

    # audio and video
    for type in 'audio', 'video':
        attachments.extend({
            'objectType': type,
            'url': url
        } for url in get_string_urls(props.get(type, [])))

    obj = {
        'id':
        prop.get('uid'),
        'objectType':
        as_type,
        'verb':
        as_verb,
        'published':
        prop.get('published', ''),
        'updated':
        prop.get('updated', ''),
        'displayName':
        get_text(prop.get('name')),
        'summary':
        get_text(prop.get('summary')),
        'content':
        get_html(prop.get('content')),
        'url':
        urls[0] if urls else None,
        'urls': [{
            'value': u
        } for u in urls] if urls and len(urls) > 1 else None,
        'image': [{
            'url': url
        } for url in util.dedupe_urls(
            absolute_urls('photo') + absolute_urls('featured'))],
        'stream': [{
            'url': url
        } for url in absolute_urls('video')],
        'location':
        json_to_object(prop.get('location')),
        'replies': {
            'items': [json_to_object(c) for c in props.get('comment', [])]
        },
        'tags': [{
            'objectType': 'hashtag',
            'displayName': cat
        } if isinstance(cat, basestring) else json_to_object(cat)
                 for cat in props.get('category', [])],
        'attachments':
        attachments,
    }

    # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
    interpreted = mf2util.interpret({'items': [mf2]}, None)
    if interpreted:
        loc = interpreted.get('location')
        if loc:
            obj['location']['objectType'] = 'place'
            lat, lng = loc.get('latitude'), loc.get('longitude')
            if lat and lng:
                try:
                    obj['location']['latitude'] = float(lat)
                    obj['location']['longitude'] = float(lng)
                    # TODO fill in 'position', maybe using Source.postprocess_object?
                except ValueError:
                    logging.warn(
                        'Could not convert latitude/longitude (%s, %s) to decimal',
                        lat, lng)

    if as_type == 'activity':
        objects = []
        for target in itertools.chain.from_iterable(
                props.get(field, [])
                for field in ('like', 'like-of', 'repost', 'repost-of',
                              'in-reply-to', 'invitee')):
            t = json_to_object(target) if isinstance(target, dict) else {
                'url': target
            }
            # eliminate duplicates from redundant backcompat properties
            if t not in objects:
                objects.append(t)
        obj.update({
            'object': objects[0] if len(objects) == 1 else objects,
            'actor': author,
        })
    else:
        obj.update({
            'inReplyTo': [{
                'url': url
            } for url in get_string_urls(props.get('in-reply-to', []))],
            'author':
            author,
        })

    return util.trim_nulls(obj)

コード例 #16

ファイルを表示

    def template_vars(self, domain=None, url=None):
        logging.debug(f'Headers: {list(request.headers.items())}')

        if domain.split('.')[-1] in NON_TLDS:
            error(f"{domain} doesn't look like a domain", status=404)

        # find representative h-card. try url, then url's home page, then domain
        urls = [f'http://{domain}/']
        if url:
            urls = [url, urllib.parse.urljoin(url, '/')] + urls

        for candidate in urls:
            resp = common.requests_get(candidate)
            parsed = util.parse_html(resp)
            mf2 = util.parse_mf2(parsed, url=resp.url)
            # logging.debug(f'Parsed mf2 for {resp.url}: {json_dumps(mf2, indent=2)}')
            hcard = mf2util.representative_hcard(mf2, resp.url)
            if hcard:
                logging.info(
                    f'Representative h-card: {json_dumps(hcard, indent=2)}')
                break
        else:
            error(
                f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {resp.url}"
            )

        logging.info(f'Generating WebFinger data for {domain}')
        key = models.MagicKey.get_or_create(domain)
        props = hcard.get('properties', {})
        urls = util.dedupe_urls(props.get('url', []) + [resp.url])
        canonical_url = urls[0]

        acct = f'{domain}@{domain}'
        for url in urls:
            if url.startswith('acct:'):
                urluser, urldomain = util.parse_acct_uri(url)
                if urldomain == domain:
                    acct = f'{urluser}@{domain}'
                    logging.info(f'Found custom username: acct:{acct}')
                    break

        # discover atom feed, if any
        atom = parsed.find('link',
                           rel='alternate',
                           type=common.CONTENT_TYPE_ATOM)
        if atom and atom['href']:
            atom = urllib.parse.urljoin(resp.url, atom['href'])
        else:
            atom = 'https://granary.io/url?' + urllib.parse.urlencode(
                {
                    'input': 'html',
                    'output': 'atom',
                    'url': resp.url,
                    'hub': resp.url,
                })

        # discover PuSH, if any
        for link in resp.headers.get('Link', '').split(','):
            match = common.LINK_HEADER_RE.match(link)
            if match and match.group(2) == 'hub':
                hub = match.group(1)
            else:
                hub = 'https://bridgy-fed.superfeedr.com/'

        # generate webfinger content
        data = util.trim_nulls({
            'subject':
            'acct:' + acct,
            'aliases':
            urls,
            'magic_keys': [{
                'value': key.href()
            }],
            'links':
            sum(([{
                'rel': 'http://webfinger.net/rel/profile-page',
                'type': 'text/html',
                'href': url,
            }] for url in urls if url.startswith("http")), []) +
            [{
                'rel': 'http://webfinger.net/rel/avatar',
                'href': get_text(url),
            } for url in props.get('photo', [])] + [
                {
                    'rel': 'canonical_uri',
                    'type': 'text/html',
                    'href': canonical_url,
                },

                # ActivityPub
                {
                    'rel': 'self',
                    'type': common.CONTENT_TYPE_AS2,
                    # WARNING: in python 2 sometimes request.host_url lost port,
                    # http://localhost:8080 would become just http://localhost. no
                    # clue how or why. pay attention here if that happens again.
                    'href': f'{request.host_url}{domain}',
                },
                {
                    'rel': 'inbox',
                    'type': common.CONTENT_TYPE_AS2,
                    'href': f'{request.host_url}{domain}/inbox',
                },

                # OStatus
                {
                    'rel': 'http://schemas.google.com/g/2010#updates-from',
                    'type': common.CONTENT_TYPE_ATOM,
                    'href': atom,
                },
                {
                    'rel': 'hub',
                    'href': hub,
                },
                {
                    'rel': 'magic-public-key',
                    'href': key.href(),
                },
                {
                    'rel': 'salmon',
                    'href': f'{request.host_url}{domain}/salmon',
                }
            ]
        })
        logging.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}')
        return data