コード例 #1
0
    def fetch_mf2(self, url, require_mf2=True, raise_errors=False):
        """Fetches a URL and extracts its mf2 data.

    Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()`
    on errors.

    Args:
      url: string
      require_mf2: boolean, whether to return error if no mf2 are found
      raise_errors: boolean, whether to let error exceptions propagate up or
        handle them

    Returns:
      (:class:`requests.Response`, mf2 data dict) on success, None on failure
    """
        try:
            resp = util.requests_get(url)
            resp.raise_for_status()
        except BaseException as e:
            if raise_errors:
                raise
            util.interpret_http_exception(e)  # log exception
            return self.error('Could not fetch source URL %s' % url)

        if self.entity:
            self.entity.html = resp.text

        # parse microformats
        soup = util.parse_html(resp)
        mf2 = util.parse_mf2(soup, resp.url)

        # special case tumblr's markup: div#content > div.post > div.copy
        # convert to mf2 and re-parse
        if not mf2.get('items'):
            contents = soup.find_all(id='content')
            if contents:
                post = contents[0].find_next(class_='post')
                if post:
                    post['class'] = 'h-entry'
                    copy = post.find_next(class_='copy')
                    if copy:
                        copy['class'] = 'e-content'
                    photo = post.find_next(class_='photo-wrapper')
                    if photo:
                        img = photo.find_next('img')
                        if img:
                            img['class'] = 'u-photo'
                    # TODO: i should be able to pass post or contents[0] to mf2py instead
                    # here, but it returns no items. mf2py bug?
                    doc = str(post)
                    mf2 = util.parse_mf2(doc, resp.url)

        logging.debug('Parsed microformats2: %s', json_dumps(mf2, indent=2))
        items = mf2.get('items', [])
        if require_mf2 and (not items or not items[0]):
            return self.error('No microformats2 data found in ' + resp.url,
                              data=mf2,
                              html="""
No <a href="http://microformats.org/get-started">microformats</a> or
<a href="http://microformats.org/wiki/microformats2">microformats2</a> found in
<a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a>
for details (skip to level 2, <em>Publishing on the IndieWeb</em>).
""" % (resp.url, util.pretty_link(resp.url)))

        return resp, mf2
コード例 #2
0
    def post(self, source_short_name):
        logging.info('Params: %self', self.request.params.items())
        # strip fragments from source and target url
        self.source_url = urllib.parse.urldefrag(
            util.get_required_param(self, 'source'))[0]
        self.target_url = urllib.parse.urldefrag(
            util.get_required_param(self, 'target'))[0]

        # follow target url through any redirects, strip utm_* query params
        resp = util.follow_redirects(self.target_url)
        redirected_target_urls = [r.url for r in resp.history]
        self.target_url = util.clean_url(resp.url)

        # parse and validate target URL
        domain = util.domain_from_link(self.target_url)
        if not domain:
            return self.error('Could not parse target URL %s' %
                              self.target_url)

        # look up source by domain
        source_cls = models.sources[source_short_name]
        domain = domain.lower()
        self.source = (source_cls.query().filter(
            source_cls.domains == domain).filter(
                source_cls.features == 'webmention').filter(
                    source_cls.status == 'enabled').get())
        if not self.source:
            # check for a rel-canonical link. Blogger uses these when it serves a post
            # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs
            # epeus.blogspot.com.
            # https://github.com/snarfed/bridgy/issues/805
            mf2 = self.fetch_mf2(self.target_url, require_mf2=False)
            if not mf2:
                # fetch_mf2() already wrote the error response
                return
            domains = util.dedupe_urls(
                util.domain_from_link(url)
                for url in mf2[1]['rels'].get('canonical', []))
            if domains:
                self.source = (source_cls.query().filter(
                    source_cls.domains.IN(domains)).filter(
                        source_cls.features == 'webmention').filter(
                            source_cls.status == 'enabled').get())

        if not self.source:
            return self.error(
                'Could not find %s account for %s. Is it registered with Bridgy?'
                % (source_cls.GR_CLASS.NAME, domain))

        # check that the target URL path is supported
        target_path = urllib.parse.urlparse(self.target_url).path
        if target_path in ('', '/'):
            return self.error(
                'Home page webmentions are not currently supported.',
                status=202)
        for pattern in self.source.PATH_BLACKLIST:
            if pattern.match(target_path):
                return self.error(
                    '%s webmentions are not supported for URL path: %s' %
                    (self.source.GR_CLASS.NAME, target_path),
                    status=202)

        # create BlogWebmention entity
        id = '%s %s' % (self.source_url, self.target_url)
        self.entity = BlogWebmention.get_or_insert(
            id,
            source=self.source.key,
            redirected_target_urls=redirected_target_urls)
        if self.entity.status == 'complete':
            # TODO: response message saying update isn't supported
            self.response.write(self.entity.published)
            return
        logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe())

        # fetch source page
        fetched = self.fetch_mf2(self.source_url)
        if not fetched:
            return
        resp, mf2 = fetched

        item = self.find_mention_item(mf2.get('items', []))
        if not item:
            return self.error(
                'Could not find target URL %s in source page %s' %
                (self.target_url, resp.url),
                data=mf2,
                log_exception=False)

        # default author to target domain
        author_name = domain
        author_url = 'http://%s/' % domain

        # extract author name and URL from h-card, if any
        props = item['properties']
        author = first_value(props, 'author')
        if author:
            if isinstance(author, basestring):
                author_name = author
            else:
                author_props = author.get('properties', {})
                author_name = first_value(author_props, 'name')
                author_url = first_value(author_props, 'url')

        # if present, u-url overrides source url
        u_url = first_value(props, 'url')
        if u_url:
            self.entity.u_url = u_url

        # generate content
        content = props['content'][
            0]  # find_mention_item() guaranteed this is here
        text = (content.get('html') or content.get('value')).strip()
        source_url = self.entity.source_url()
        text += ' <br /> <a href="%s">via %s</a>' % (
            source_url, util.domain_from_link(source_url))

        # write comment
        try:
            self.entity.published = self.source.create_comment(
                self.target_url, author_name, author_url, text)
        except Exception as e:
            code, body = util.interpret_http_exception(e)
            msg = 'Error: %s %s; %s' % (code, e, body)
            if code == '401':
                logging.warning('Disabling source due to: %s' % e,
                                exc_info=True)
                self.source.status = 'disabled'
                self.source.put()
                return self.error(msg,
                                  status=code,
                                  report=self.source.is_beta_user())
            elif code == '404':
                # post is gone
                return self.error(msg, status=code, report=False)
            elif util.is_connection_failure(e) or (code
                                                   and int(code) // 100 == 5):
                return self.error(msg,
                                  status=util.ERROR_HTTP_RETURN_CODE,
                                  report=False)
            elif code or body:
                return self.error(msg, status=code, report=True)
            else:
                raise

        # write results to datastore
        self.entity.status = 'complete'
        self.entity.put()
        self.response.write(json_dumps(self.entity.published))
コード例 #3
0
    def receive(self, email):
        addr = self.request.path.split('/')[-1]
        message_id = email.original.get('message-id').strip('<>')
        sender = getattr(email, 'sender', None)
        to = getattr(email, 'to', None)
        cc = getattr(email, 'cc', None)
        subject = getattr(email, 'subject', None)
        logging.info('Received %s from %s to %s (%s) cc %s: %s', message_id,
                     sender, to, addr, cc, subject)

        addr = self.request.path.split('/')[-1]
        user = addr.split('@')[0]
        source = FacebookEmailAccount.query(
            FacebookEmailAccount.email_user == user).get()
        logging.info('Source for %s is %s', user, source)

        util.email_me(subject='New email from %s: %s' % (sender, subject),
                      body='Source: %s' %
                      (source.bridgy_url(self) if source else None))

        htmls = list(body.decode() for _, body in email.bodies('text/html'))
        fbe = FacebookEmail.get_or_insert(
            message_id, source=source.key if source else None, htmls=htmls)
        logging.info('FacebookEmail created %s: %s', fbe.created,
                     fbe.key.urlsafe())

        if not source:
            self.response.status_code = 404
            self.response.write(
                'No Facebook email user found with address %s' % addr)
            return

        for html in htmls:
            obj = gr_facebook.Facebook().email_to_object(html)
            if obj:
                break
        else:
            self.response.status_code = 400
            self.response.write('No HTML body could be parsed')
            return
        logging.info('Converted to AS1: %s', json_dumps(obj, indent=2))

        base_obj = source.gr_source.base_object(obj)
        # note that this ignores the id query param (the post's user id) and uses
        # the source object's user id instead.
        base_obj['url'] = source.canonicalize_url(base_obj['url'])
        # also note that base_obj['id'] is not a tag URI, it's the raw Facebook post
        # id, eg '104790764108207'. we don't use it from activities_json much,
        # though, just in PropagateResponse.source_url(), which handles this fine.

        original_post_discovery.refetch(source)
        targets, mentions = original_post_discovery.discover(source,
                                                             base_obj,
                                                             fetch_hfeed=False)
        logging.info('Got targets %s mentions %s', targets, mentions)

        resp = Response(id=obj['id'],
                        source=source.key,
                        type=Response.get_type(obj),
                        response_json=json_dumps(obj),
                        activities_json=[json_dumps(base_obj)],
                        unsent=targets)
        resp.get_or_save(source, restart=True)

        fbe.response = resp.key
        fbe.put()
コード例 #4
0
 def store_activity(self):
     activity = copy.deepcopy(MBASIC_ACTIVITIES[0])
     activity['actor']['url'] = 'http://snarfed.org/'
     return Activity(id='tag:facebook.com,2013:123',
                     source=self.source.key,
                     activity_json=json_dumps(activity)).put()
コード例 #5
0
ファイル: test_mastodon.py プロジェクト: sheyril/bridgy
    def test_is_private(self):
        self.assertFalse(self.m.is_private())

        self.auth_entity.user_json = json_dumps({'locked': True})
        self.auth_entity.put()
        self.assertTrue(self.m.is_private())
コード例 #6
0
 def store_activity(self):
     activity = copy.deepcopy(HTML_PHOTO_ACTIVITY)
     activity['actor']['url'] = 'http://snarfed.org/'
     return Activity(id='tag:instagram.com,2013:123_456',
                     source=self.source.key,
                     activity_json=json_dumps(activity)).put()
コード例 #7
0
ファイル: handlers.py プロジェクト: sheyril/bridgy
    def get(self, type, source_short_name, string_id, *ids):
        source_cls = models.sources.get(source_short_name)
        if not source_cls:
            self.abort(
                400, "Source type '%s' not found. Known sources: %s" %
                (source_short_name, filter(None, models.sources.keys())))

        self.source = source_cls.get_by_id(string_id)
        if not self.source:
            self.abort(
                400, 'Source %s %s not found' % (source_short_name, string_id))
        elif (self.source.status == 'disabled'
              or 'listen' not in self.source.features):
            self.abort(
                400, 'Source %s is disabled for backfeed' %
                self.source.bridgy_path())

        format = self.request.get('format', 'html')
        if format not in ('html', 'json'):
            self.abort(400,
                       'Invalid format %s, expected html or json' % format)

        for id in ids:
            if not self.VALID_ID.match(id):
                self.abort(404, 'Invalid id %s' % id)

        try:
            obj = self.get_item(*ids)
        except models.DisableSource as e:
            self.abort(
                401,
                "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!"
            )
        except ValueError as e:
            self.abort(400, '%s error:\n%s' % (self.source.GR_CLASS.NAME, e))
        except Exception as e:
            # pass through all API HTTP errors if we can identify them
            code, body = util.interpret_http_exception(e)
            if code:
                self.response.status_int = int(code)
                self.response.headers['Content-Type'] = 'text/plain'
                self.response.write('%s error:\n%s' %
                                    (self.source.GR_CLASS.NAME, body))
                return
            else:
                raise

        if not obj:
            self.abort(
                404, 'Not found: %s:%s %s %s' %
                (source_short_name, string_id, type, ids))

        if self.source.is_blocked(obj):
            self.abort(410, 'That user is currently blocked')

        # use https for profile pictures so we don't cause SSL mixed mode errors
        # when serving over https.
        author = obj.get('author', {})
        image = author.get('image', {})
        url = image.get('url')
        if url:
            image['url'] = util.update_scheme(url, self)

        mf2_json = microformats2.object_to_json(obj, synthesize_content=False)

        # try to include the author's silo profile url
        author = first_props(mf2_json.get('properties', {})).get('author', {})
        author_uid = first_props(author.get('properties', {})).get('uid', '')
        if author_uid:
            parsed = util.parse_tag_uri(author_uid)
            if parsed:
                urls = author.get('properties', {}).setdefault('url', [])
                try:
                    silo_url = self.source.gr_source.user_url(parsed[1])
                    if silo_url not in microformats2.get_string_urls(urls):
                        urls.append(silo_url)
                except NotImplementedError:  # from gr_source.user_url()
                    pass

        # write the response!
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        if format == 'html':
            self.response.headers['Content-Type'] = 'text/html; charset=utf-8'
            url = obj.get('url', '')
            self.response.out.write(
                TEMPLATE.substitute({
                    'refresh':
                    (('<meta http-equiv="refresh" content="0;url=%s">' %
                      url) if url else ''),
                    'url':
                    url,
                    'body':
                    microformats2.json_to_html(mf2_json),
                    'title':
                    obj.get('title') or obj.get('content')
                    or 'Bridgy Response',
                }))
        elif format == 'json':
            self.response.headers[
                'Content-Type'] = 'application/json; charset=utf-8'
            self.response.out.write(json_dumps(mf2_json, indent=2))
コード例 #8
0
    def test_registration_with_user_url(self):
        """Run through an authorization back and forth with a custom user url
    provided to the auth mechanism
    """
        encoded_state = urllib.parse.quote_plus(
            json_dumps(
                {
                    'callback': 'http://withknown.com/bridgy_callback',
                    'feature': 'listen',
                    'operation': 'add',
                    'user_url': 'https://kylewm.com',
                },
                sort_keys=True))

        application = webapp2.WSGIApplication([
            ('/fakesource/start', testutil.FakeStartHandler),
            ('/fakesource/add', testutil.FakeAddHandler),
        ])

        self.expect_webmention_requests_get(
            'https://kylewm.com/',
            response='<html><link rel="webmention" href="/webmention"></html>')

        self.mox.ReplayAll()

        resp = application.get_response(
            '/fakesource/start',
            method='POST',
            text=urllib.parse.urlencode({
                'feature': 'listen',
                'callback': 'http://withknown.com/bridgy_callback',
                'user_url': 'https://kylewm.com',
            }))

        expected_auth_url = 'http://fake/auth/url?' + urllib.parse.urlencode({
            'redirect_uri':
            'http://localhost/fakesource/add?state=' + encoded_state,
        })

        self.assert_equals(302, resp.status_code)
        self.assert_equals(expected_auth_url, resp.headers['location'])

        resp = application.get_response(
            '/fakesource/add?state=' + encoded_state +
            '&oauth_token=fake-token&oauth_token_secret=fake-secret')

        self.assert_equals(302, resp.status_code)
        self.assert_equals(
            'http://withknown.com/bridgy_callback?' + urllib.parse.urlencode([
                ('result', 'success'),
                ('user', 'http://localhost/fake/0123456789'),
                ('key', ndb.Key('FakeSource',
                                '0123456789').urlsafe().decode()),
            ]), resp.headers['location'])
        self.assertEqual(
            'logins="/fake/0123456789?Fake+User"; expires="2001-12-31 00:00:00"; Path=/',
            resp.headers['Set-Cookie'])

        source = FakeSource.get_by_id('0123456789')
        self.assertTrue(source)
        self.assert_equals('Fake User', source.name)
        self.assert_equals(['listen'], source.features)
        self.assert_equals(['https://kylewm.com/', 'http://fakeuser.com/'],
                           source.domain_urls)
        self.assert_equals(['kylewm.com', 'fakeuser.com'], source.domains)
コード例 #9
0
 def output(self, obj):
     self.response.headers['Content-Type'] = JSON_CONTENT_TYPE
     self.response.write(json_dumps(obj, indent=2))
コード例 #10
0
ファイル: api.py プロジェクト: sbarale/granary
    def write_response(self,
                       response,
                       actor=None,
                       url=None,
                       title=None,
                       hfeed=None):
        """Converts ActivityStreams activities and writes them out.

    Args:
      response: response dict with values based on OpenSocial ActivityStreams
        REST API, as returned by Source.get_activities_response()
      actor: optional ActivityStreams actor dict for current user. Only used
        for Atom and JSON Feed output.
      url: the input URL
      title: string, used in feed output (Atom, JSON Feed, RSS)
      hfeed: dict, parsed mf2 h-feed, if available
    """
        format = self.request.get('format') or self.request.get(
            'output') or 'json'
        if format not in FORMATS:
            raise exc.HTTPBadRequest('Invalid format: %s, expected one of %r' %
                                     (format, FORMATS))

        if 'plaintext' in self.request.params:
            # override content type
            self.response.headers['Content-Type'] = 'text/plain'
        else:
            content_type = FORMATS.get(format)
            if content_type:
                self.response.headers['Content-Type'] = content_type

        if self.request.method == 'HEAD':
            return

        activities = response['items']
        try:
            if format in ('as1', 'json', 'activitystreams'):
                self.response.out.write(json_dumps(response, indent=2))
            elif format == 'as2':
                response.update({
                    'items': [as2.from_as1(a) for a in activities],
                    'totalItems':
                    response.pop('totalResults', None),
                    'updated':
                    response.pop('updatedSince', None),
                    'filtered':
                    None,
                    'sorted':
                    None,
                })
                self.response.out.write(
                    json_dumps(util.trim_nulls(response), indent=2))
            elif format == 'atom':
                hub = self.request.get('hub')
                reader = self.request.get('reader', 'true').lower()
                if reader not in ('true', 'false'):
                    self.abort(400,
                               'reader param must be either true or false')
                if not actor and hfeed:
                    actor = microformats2.json_to_object({
                        'properties':
                        hfeed.get('properties', {}),
                    })
                self.response.out.write(
                    atom.activities_to_atom(activities,
                                            actor,
                                            host_url=url
                                            or self.request.host_url + '/',
                                            request_url=self.request.url,
                                            xml_base=util.base_url(url),
                                            title=title,
                                            rels={'hub': hub} if hub else None,
                                            reader=(reader == 'true')))
                self.response.headers.add(
                    'Link',
                    '<%s>; rel="self"' % util.quote_path(self.request.url))
                if hub:
                    self.response.headers.add(
                        'Link', '<%s>; rel="hub"' % util.quote_path(hub))
            elif format == 'rss':
                if not title:
                    title = 'Feed for %s' % url
                self.response.out.write(
                    rss.from_activities(activities,
                                        actor,
                                        title=title,
                                        feed_url=self.request.url,
                                        hfeed=hfeed,
                                        home_page_url=util.base_url(url)))
            elif format in ('as1-xml', 'xml'):
                self.response.out.write(XML_TEMPLATE % util.to_xml(response))
            elif format == 'html':
                self.response.out.write(
                    microformats2.activities_to_html(activities))
            elif format in ('mf2-json', 'json-mf2'):
                items = [microformats2.activity_to_json(a) for a in activities]
                self.response.out.write(json_dumps({'items': items}, indent=2))
            elif format == 'jsonfeed':
                try:
                    jf = jsonfeed.activities_to_jsonfeed(
                        activities,
                        actor=actor,
                        title=title,
                        feed_url=self.request.url)
                except TypeError as e:
                    raise exc.HTTPBadRequest('Unsupported input data: %s' % e)
                self.response.out.write(json_dumps(jf, indent=2))
        except ValueError as e:
            logging.warning('converting to output format failed',
                            stack_info=True)
            self.abort(400, 'Could not convert to %s: %s' % (format, str(e)))
コード例 #11
0
 def test_new_no_primary_blog(self):
   self.auth_entity.user_json = json_dumps({'user': {'blogs': [{'url': 'foo'}]}})
   with app.test_request_context():
     self.assertIsNone(Tumblr.new(auth_entity=self.auth_entity))
     self.assertIn('Tumblr blog not found', get_flashed_messages()[0])
コード例 #12
0
ファイル: webmention.py プロジェクト: mdheller/bridgy-fed
    def _try_salmon(self, resp):
        """
        Args:
          resp: Response
        """
        # fetch target HTML page, extract Atom rel-alternate link
        if not self.target_resp:
            self.target_resp = common.requests_get(resp.target())

        parsed = util.parse_html(self.target_resp)
        atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM)
        if not atom_url or not atom_url.get('href'):
            self.error('Target post %s has no Atom link' % resp.target(), status=400)

        # fetch Atom target post, extract and inject id into source object
        base_url = ''
        base = parsed.find('base')
        if base and base.get('href'):
            base_url = base['href']
        atom_link = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM)
        atom_url = urllib.parse.urljoin(
            resp.target(), urllib.parse.urljoin(base_url, atom_link['href']))

        feed = common.requests_get(atom_url).text
        parsed = feedparser.parse(feed)
        logging.info('Parsed: %s', json_dumps(parsed, indent=2))
        entry = parsed.entries[0]
        target_id = entry.id
        in_reply_to = self.source_obj.get('inReplyTo')
        source_obj_obj = self.source_obj.get('object')
        if in_reply_to:
            in_reply_to[0]['id'] = target_id
        elif isinstance(source_obj_obj, dict):
            source_obj_obj['id'] = target_id

        # Mastodon (and maybe others?) require a rel-mentioned link to the
        # original post's author to make it show up as a reply:
        #   app/services/process_interaction_service.rb
        # ...so add them as a tag, which atom renders as a rel-mention link.
        authors = entry.get('authors', None)
        if authors:
            url = entry.authors[0].get('href')
            if url:
                self.source_obj.setdefault('tags', []).append({'url': url})

        # extract and discover salmon endpoint
        logging.info('Discovering Salmon endpoint in %s', atom_url)
        endpoint = django_salmon.discover_salmon_endpoint(feed)

        if not endpoint:
            # try webfinger
            parsed = urllib.parse.urlparse(resp.target())
            # TODO: test missing email
            author = entry.get('author_detail', {})
            email = author.get('email') or '@'.join(
                (author.get('name', ''), parsed.netloc))
            try:
                # TODO: always https?
                profile = common.requests_get(
                    '%s://%s/.well-known/webfinger?resource=acct:%s' %
                    (parsed.scheme, parsed.netloc, email), verify=False)
                endpoint = django_salmon.get_salmon_replies_link(profile.json())
            except requests.HTTPError as e:
                pass

        if not endpoint:
            self.error('No salmon endpoint found!', status=400)
        logging.info('Discovered Salmon endpoint %s', endpoint)

        # construct reply Atom object
        self.source_url = resp.source()
        activity = self.source_obj
        if self.source_obj.get('verb') not in source.VERBS_WITH_OBJECT:
            activity = {'object': self.source_obj}
        entry = atom.activity_to_atom(activity, xml_base=self.source_url)
        logging.info('Converted %s to Atom:\n%s', self.source_url, entry)

        # sign reply and wrap in magic envelope
        domain = urllib.parse.urlparse(self.source_url).netloc
        key = MagicKey.get_or_create(domain)
        logging.info('Using key for %s: %s', domain, key)
        magic_envelope = magicsigs.magic_envelope(
            entry, common.CONTENT_TYPE_ATOM, key).decode()

        logging.info('Sending Salmon slap to %s', endpoint)
        common.requests_post(
            endpoint, data=common.XML_UTF8 + magic_envelope,
            headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE})
        return True
コード例 #13
0
ファイル: webmention.py プロジェクト: mdheller/bridgy-fed
    def _activitypub_targets(self):
        """
        Returns: list of (Response, string inbox URL)
        """
        # if there's an in-reply-to, like-of, or repost-of, that's the target.
        # otherwise, it's all followers' inboxes.
        target = self._single_target()

        if not target:
            # interpret this as a Create or Update, deliver it to followers
            inboxes = []
            for follower in Follower.query().filter(
                Follower.key > Key('Follower', self.source_domain + ' '),
                Follower.key < Key('Follower', self.source_domain + chr(ord(' ') + 1))):
                if follower.status != 'inactive' and follower.last_follow:
                    actor = json_loads(follower.last_follow).get('actor')
                    if actor and isinstance(actor, dict):
                        inboxes.append(actor.get('endpoints', {}).get('sharedInbox') or
                                       actor.get('publicInbox')or
                                       actor.get('inbox'))
            return [(Response.get_or_create(
                        source=self.source_url, target=inbox, direction='out',
                        protocol='activitypub', source_mf2=json_dumps(self.source_mf2)),
                     inbox)
                    for inbox in inboxes if inbox]

        # fetch target page as AS2 object
        try:
            self.target_resp = common.get_as2(target)
        except (requests.HTTPError, exc.HTTPBadGateway) as e:
            self.target_resp = getattr(e, 'response', None)
            if (self.target_resp and self.target_resp.status_code // 100 == 2 and
                common.content_type(self.target_resp).startswith('text/html')):
                # TODO: pass e.response to try_salmon()'s target_resp
                return False  # make post() try Salmon
            else:
                raise
        target_url = self.target_resp.url or target

        resp = Response.get_or_create(
            source=self.source_url, target=target_url, direction='out',
            protocol='activitypub', source_mf2=json_dumps(self.source_mf2))

        # find target's inbox
        target_obj = self.target_resp.json()
        resp.target_as2 = json_dumps(target_obj)
        inbox_url = target_obj.get('inbox')

        if not inbox_url:
            # TODO: test actor/attributedTo and not, with/without inbox
            actor = (util.get_first(target_obj, 'actor') or
                     util.get_first(target_obj, 'attributedTo'))
            if isinstance(actor, dict):
                inbox_url = actor.get('inbox')
                actor = actor.get('url') or actor.get('id')
            if not inbox_url and not actor:
                self.error('Target object has no actor or attributedTo with URL or id.')
            elif not isinstance(actor, str):
                self.error('Target actor or attributedTo has unexpected url or id object: %r' % actor)

        if not inbox_url:
            # fetch actor as AS object
            actor = common.get_as2(actor).json()
            inbox_url = actor.get('inbox')

        if not inbox_url:
            # TODO: probably need a way to save errors like this so that we can
            # return them if ostatus fails too.
            # self.error('Target actor has no inbox')
            return []

        inbox_url = urllib.parse.urljoin(target_url, inbox_url)
        return [(resp, inbox_url)]
コード例 #14
0
ファイル: test_app.py プロジェクト: muhammedsansal/bridgy
    def test_retry(self):
        self.assertEqual([], self.taskqueue_stub.GetTasks('propagate'))

        source = self.sources[0]
        source.domain_urls = ['http://orig']
        source.last_hfeed_refetch = last_hfeed_refetch = \
            testutil.NOW - datetime.timedelta(minutes=1)
        source.put()

        resp = self.responses[0]
        resp.status = 'complete'
        resp.unsent = ['http://unsent']
        resp.sent = ['http://sent']
        resp.error = ['http://error']
        resp.failed = ['http://failed']
        resp.skipped = ['https://skipped']

        # SyndicatedPost with new target URLs
        resp.activities_json = [
            json_dumps({'object': {
                'url': 'https://fa.ke/1'
            }}),
            json_dumps({
                'url': 'https://fa.ke/2',
                'object': {
                    'unused': 'ok'
                }
            }),
            json_dumps({'url': 'https://fa.ke/3'}),
        ]
        resp.put()
        SyndicatedPost.insert(source, 'https://fa.ke/1', 'https://orig/1')
        SyndicatedPost.insert(source, 'https://fa.ke/2', 'http://orig/2')
        SyndicatedPost.insert(source, 'https://fa.ke/3', 'http://orig/3')

        # cached webmention endpoint
        memcache.set('W https skipped /', 'asdf')

        key = resp.key.urlsafe()
        response = app.application.get_response(
            '/retry',
            method='POST',
            body=native_str(urllib.parse.urlencode({'key': key})))
        self.assertEquals(302, response.status_int)
        self.assertEquals(source.bridgy_url(self.handler),
                          response.headers['Location'].split('#')[0])
        params = testutil.get_task_params(
            self.taskqueue_stub.GetTasks('propagate')[0])
        self.assertEqual(key, params['response_key'])

        # status and URLs should be refreshed
        got = resp.key.get()
        self.assertEqual('new', got.status)
        self.assertItemsEqual([
            'http://unsent/', 'http://sent/', 'https://skipped/',
            'http://error/', 'http://failed/', 'https://orig/1',
            'http://orig/2', 'http://orig/3'
        ], got.unsent)
        for field in got.sent, got.skipped, got.error, got.failed:
            self.assertEqual([], field)

        # webmention endpoints for URL domains should be refreshed
        self.assertIsNone(memcache.get('W https skipped /'))

        # shouldn't have refetched h-feed
        self.assertEqual(last_hfeed_refetch,
                         source.key.get().last_hfeed_refetch)
コード例 #15
0
    def setUp(self):
        super(ModelsTest, self).setUp()

        # sources
        auth_entities = [
            FakeAuthEntity(key=ndb.Key('FakeAuthEntity', '01122334455'),
                           user_json=json_dumps({
                               'id': '0123456789',
                               'name': 'Fake User',
                               'url': 'http://fakeuser.com/',
                           })),
            FakeAuthEntity(key=ndb.Key('FakeAuthEntity', '0022446688'),
                           user_json=json_dumps({
                               'id':
                               '0022446688',
                               'name':
                               'Another Fake',
                               'url':
                               'http://anotherfake.com/',
                           }))
        ]
        for entity in auth_entities:
            entity.put()

        self.sources = [
            FakeSource.new(None, auth_entity=auth_entities[0]),
            FakeSource.new(None, auth_entity=auth_entities[1])
        ]
        for entity in self.sources:
            entity.features = ['listen']
            entity.put()

        self.actor = FakeGrSource.actor = {
            'objectType': 'person',
            'id': 'tag:fa.ke,2013:212038',
            'username': '******',
            'displayName': 'Ryan B',
            'url': 'https://snarfed.org/',
            'image': {
                'url': 'http://pic.ture/url'
            },
        }

        # activities
        self.activities = FakeGrSource.activities = [{
            'id':
            'tag:source.com,2013:%s' % id,
            'url':
            'http://fa.ke/post/url',
            'object': {
                'objectType':
                'note',
                'id':
                'tag:source.com,2013:%s' % id,
                'url':
                'http://fa.ke/post/url',
                'content':
                'foo http://target1/post/url bar',
                'to': [{
                    'objectType': 'group',
                    'alias': '@public'
                }],
                'replies': {
                    'items': [{
                        'objectType': 'comment',
                        'id': 'tag:source.com,2013:1_2_%s' % id,
                        'url': 'http://fa.ke/comment/url',
                        'content': 'foo bar',
                    }],
                    'totalItems':
                    1,
                },
                'tags': [{
                    'objectType': 'activity',
                    'verb': 'like',
                    'id': 'tag:source.com,2013:%s_liked_by_alice' % id,
                    'object': {
                        'url': 'http://example.com/abc'
                    },
                    'author': {
                        'id': 'tag:source.com,2013:alice',
                        'url': 'http://example.com/alice',
                    },
                }, {
                    'id': 'tag:source.com,2013:%s_reposted_by_bob' % id,
                    'objectType': 'activity',
                    'verb': 'share',
                    'object': {
                        'url': 'http://example.com/def'
                    },
                    'author': {
                        'url': 'http://example.com/bob'
                    },
                }, {
                    'id': 'tag:source.com,2013:%s_scissors_by_bob' % id,
                    'objectType': 'activity',
                    'verb': 'react',
                    'content': '✁',
                    'object': {
                        'url': 'http://example.com/def'
                    },
                    'author': {
                        'url': 'http://example.com/bob'
                    },
                }],
            },
        } for id in ('a', 'b', 'c')]

        # responses
        self.responses = []
        created = datetime.datetime.utcnow() - datetime.timedelta(days=10)

        for activity in self.activities:
            obj = activity['object']
            pruned_activity = {
                'id': activity['id'],
                'url': 'http://fa.ke/post/url',
                'object': {
                    'content': 'foo http://target1/post/url bar',
                }
            }

            comment = obj['replies']['items'][0]
            self.responses.append(
                Response(id=comment['id'],
                         activities_json=[json_dumps(pruned_activity)],
                         response_json=json_dumps(comment),
                         type='comment',
                         source=self.sources[0].key,
                         unsent=['http://target1/post/url'],
                         created=created))

            created += datetime.timedelta(hours=1)

            like = obj['tags'][0]
            self.responses.append(
                Response(id=like['id'],
                         activities_json=[json_dumps(pruned_activity)],
                         response_json=json_dumps(like),
                         type='like',
                         source=self.sources[0].key,
                         unsent=['http://target1/post/url'],
                         created=created))

            created += datetime.timedelta(hours=1)

            share = obj['tags'][1]
            self.responses.append(
                Response(id=share['id'],
                         activities_json=[json_dumps(pruned_activity)],
                         response_json=json_dumps(share),
                         type='repost',
                         source=self.sources[0].key,
                         unsent=['http://target1/post/url'],
                         created=created))

            created += datetime.timedelta(hours=1)

            reaction = obj['tags'][2]
            self.responses.append(
                Response(id=reaction['id'],
                         activities_json=[json_dumps(pruned_activity)],
                         response_json=json_dumps(reaction),
                         type='react',
                         source=self.sources[0].key,
                         unsent=['http://target1/post/url'],
                         created=created))

            created += datetime.timedelta(hours=1)

        # publishes
        self.publishes = [
            Publish(
                parent=PublishedPage(id='https://post').key,
                source=self.sources[0].key,
                status='complete',
                published={'url': 'http://fa.ke/syndpost'},
            )
        ]

        # blogposts
        self.blogposts = [
            BlogPost(
                id='https://post',
                source=self.sources[0].key,
                status='complete',
                feed_item={'title': 'a post'},
                sent=['http://a/link'],
            )
        ]
コード例 #16
0
ファイル: test_app.py プロジェクト: siakaramalegos/bridgy
  def test_social_user_page_mf2(self):
    """Check the custom mf2 we render on social user pages."""
    self.sources[0].features = ['listen', 'publish']
    self.sources[0].put()

    # test invite with missing object and content
    resp = json_loads(self.responses[8].response_json)
    resp['verb'] = 'invite'
    resp.pop('object', None)
    resp.pop('content', None)
    self.responses[8].response_json = json_dumps(resp)

    # test that invites render the invitee, not the inviter
    # https://github.com/snarfed/bridgy/issues/754
    self.responses[9].type = 'rsvp'
    self.responses[9].response_json = json_dumps({
      'id': 'tag:fa.ke,2013:111',
      'objectType': 'activity',
      'verb': 'invite',
      'url': 'http://fa.ke/event',
      'actor': {
        'displayName': 'Mrs. Host',
        'url': 'http://fa.ke/host',
      },
      'object': {
        'objectType': 'person',
        'displayName': 'Ms. Guest',
        'url': 'http://fa.ke/guest',
      },
    })

    for entity in self.responses + self.publishes + self.blogposts:
      entity.put()

    user_url = self.sources[0].bridgy_path()
    response = app.application.get_response(user_url)
    self.assertEquals(200, response.status_int)

    parsed = util.parse_mf2(response.body, user_url)
    hcard = parsed.get('items', [])[0]
    self.assertEquals(['h-card'], hcard['type'])
    self.assertEquals(
      ['Fake User'], hcard['properties'].get('name'))
    self.assertEquals(
      ['http://fa.ke/profile/url'], hcard['properties'].get('url'))
    self.assertEquals(
      ['enabled'], hcard['properties'].get('bridgy-account-status'))
    self.assertEquals(
      ['enabled'], hcard['properties'].get('bridgy-listen-status'))
    self.assertEquals(
      ['enabled'], hcard['properties'].get('bridgy-publish-status'))

    expected_resps = self.responses[:10]
    for item, resp in zip(hcard['children'], expected_resps):
      self.assertIn('h-bridgy-response', item['type'])
      props = item['properties']
      self.assertEquals([resp.status], props['bridgy-status'])
      self.assertEquals([json_loads(resp.activities_json[0])['url']],
                        props['bridgy-original-source'])
      self.assertEquals(resp.unsent, props['bridgy-target'])

    # check invite
    html = response.body.decode('utf-8')
    self.assertIn('Ms. Guest is invited.', html)
    self.assertNotIn('Mrs. Host is invited.', html)

    publish = hcard['children'][len(expected_resps)]
    self.assertIn('h-bridgy-publish', publish['type'])
    props = publish['properties']
    self.assertEquals([self.publishes[0].key.parent().id()], props['url'])
    self.assertEquals([self.publishes[0].status], props['bridgy-status'])
コード例 #17
0
    HTML_VIDEO_ACTIVITY_FULL,
    HTML_VIDEO_EXTRA_COMMENT_OBJ,
    HTML_VIDEO_PAGE,
    HTML_VIEWER_CONFIG,
    LIKE_OBJS,
)
from oauth_dropins.webutil.util import HTTP_TIMEOUT, json_dumps, json_loads

import app
from instagram import Instagram
from models import Activity, Domain
from .testutil import ModelsTest

HTML_VIDEO_WITH_VIEWER = copy.deepcopy(HTML_VIDEO_PAGE)
HTML_VIDEO_WITH_VIEWER['config'] = HTML_VIEWER_CONFIG
HTML_VIDEO_COMPLETE = HTML_HEADER + json_dumps(
    HTML_VIDEO_WITH_VIEWER) + HTML_FOOTER


class InstagramTest(ModelsTest):
    def setUp(self):
        super(InstagramTest, self).setUp()
        self.source = Instagram.new(self.handler, actor=self.actor)
        self.domain = Domain(id='snarfed.org', tokens=['towkin']).put()
        self.auth = f'token=towkin&key={self.source.key.urlsafe().decode()}'

    def get_response(self, path_query, auth=True, **kwargs):
        if auth and '?' not in path_query:
            path_query += f'?{self.auth}'
        return app.application.get_response(f'/instagram/browser/{path_query}',
                                            method='POST',
                                            **kwargs)
コード例 #18
0
    def send_webmentions(self, activity_wrapped, proxy=None, **response_props):
        """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
        Args:
          activity_wrapped: dict, AS1 activity
          response_props: passed through to the newly created Responses
        """
        activity = self.redirect_unwrap(activity_wrapped)

        verb = activity.get('verb')
        if verb and verb not in SUPPORTED_VERBS:
            self.error('%s activities are not supported yet.' % verb)

        # extract source and targets
        source = activity.get('url') or activity.get('id')
        obj = activity.get('object')
        obj_url = util.get_url(obj)

        targets = util.get_list(activity, 'inReplyTo')
        if isinstance(obj, dict):
            if not source or verb in ('create', 'post', 'update'):
                source = obj_url or obj.get('id')
            targets.extend(util.get_list(obj, 'inReplyTo'))

        tags = util.get_list(activity_wrapped, 'tags')
        obj_wrapped = activity_wrapped.get('object')
        if isinstance(obj_wrapped, dict):
            tags.extend(util.get_list(obj_wrapped, 'tags'))
        for tag in tags:
            if tag.get('objectType') == 'mention':
                url = tag.get('url')
                if url and url.startswith(self.request.host_url):
                    targets.append(self.redirect_unwrap(url))

        if verb in ('follow', 'like', 'share'):
            targets.append(obj_url)

        targets = util.dedupe_urls(util.get_url(t) for t in targets)
        if not source:
            self.error("Couldn't find original post URL")
        if not targets:
            self.error(
                "Couldn't find any target URLs in inReplyTo, object, or mention tags"
            )

        # send webmentions and store Responses
        errors = []
        for target in targets:
            if util.domain_from_link(target) == util.domain_from_link(source):
                logging.info('Skipping same-domain webmention from %s to %s',
                             source, target)
                continue

            response = Response(source=source,
                                target=target,
                                direction='in',
                                **response_props)
            response.put()
            wm_source = (response.proxy_url(self) if verb
                         in ('follow', 'like', 'share') or proxy else source)
            logging.info('Sending webmention from %s to %s', wm_source, target)

            wm = send.WebmentionSend(wm_source, target)
            if wm.send(headers=HEADERS):
                logging.info('Success: %s', wm.response)
                response.status = 'complete'
            else:
                logging.warning('Failed: %s', wm.error)
                errors.append(wm.error)
                response.status = 'error'
            response.put()

        if errors:
            msg = 'Errors:\n' + '\n'.join(
                util.json_dumps(e, indent=2) for e in errors)
            self.error(msg, status=errors[0].get('http_status'))
コード例 #19
0
    def backfeed(self, source, responses=None, activities=None):
        """Processes responses and activities and generates propagate tasks.

    Stores property names and values to update in source.updates.

    Args:
      source: Source
      responses: dict mapping AS response id to AS object
      activities: dict mapping AS activity id to AS object
    """
        if responses is None:
            responses = {}
        if activities is None:
            activities = {}

        # Cache to make sure we only fetch the author's h-feed(s) the
        # first time we see it
        fetched_hfeeds = set()

        # narrow down to just public activities
        public = {}
        private = {}
        for id, activity in activities.items():
            (public if source.is_activity_public(activity) else
             private)[id] = activity
        logging.info('Found %d public activities: %s', len(public),
                     public.keys())
        logging.info('Found %d private activities: %s', len(private),
                     private.keys())

        last_public_post = (source.last_public_post or util.EPOCH).isoformat()
        public_published = util.trim_nulls(
            [a.get('object', {}).get('published') for a in public.values()])
        if public_published:
            max_published = max(public_published)
            if max_published > last_public_post:
                last_public_post = max_published
                source.updates['last_public_post'] = \
                  util.as_utc(util.parse_iso8601(max_published))

        source.updates['recent_private_posts'] = \
          len([a for a in private.values()
               if a.get('object', {}).get('published', util.EPOCH_ISO) > last_public_post])

        #
        # Step 2: extract responses, store their activities in response['activities']
        #
        # WARNING: this creates circular references in link posts found by search
        # queries in step 1, since they are their own activity. We use
        # prune_activity() and prune_response() in step 4 to remove these before
        # serializing to JSON.
        #
        for id, activity in public.items():
            obj = activity.get('object') or activity

            # handle user mentions
            user_id = source.user_tag_id()
            if obj.get(
                    'author',
                {}).get('id') != user_id and activity.get('verb') != 'share':
                for tag in obj.get('tags', []):
                    urls = tag.get('urls')
                    if tag.get('objectType') == 'person' and tag.get(
                            'id') == user_id and urls:
                        activity['originals'], activity['mentions'] = \
                          original_post_discovery.discover(
                            source, activity, fetch_hfeed=True,
                            include_redirect_sources=False,
                            already_fetched_hfeeds=fetched_hfeeds)
                        activity['mentions'].update(
                            u.get('value') for u in urls)
                        responses[id] = activity
                        break

            # handle quote mentions
            for att in obj.get('attachments', []):
                if (att.get('objectType') in ('note', 'article') and att.get(
                        'author', {}).get('id') == source.user_tag_id()):
                    # now that we've confirmed that one exists, OPD will dig
                    # into the actual attachments
                    if 'originals' not in activity or 'mentions' not in activity:
                        activity['originals'], activity['mentions'] = \
                          original_post_discovery.discover(
                            source, activity, fetch_hfeed=True,
                            include_redirect_sources=False,
                            already_fetched_hfeeds=fetched_hfeeds)
                    responses[id] = activity
                    break

            # extract replies, likes, reactions, reposts, and rsvps
            replies = obj.get('replies', {}).get('items', [])
            tags = obj.get('tags', [])
            likes = [t for t in tags if Response.get_type(t) == 'like']
            reactions = [t for t in tags if Response.get_type(t) == 'react']
            reposts = [t for t in tags if Response.get_type(t) == 'repost']
            rsvps = Source.get_rsvps_from_event(obj)

            # coalesce responses. drop any without ids
            for resp in replies + likes + reactions + reposts + rsvps:
                id = resp.get('id')
                if not id:
                    logging.error('Skipping response without id: %s',
                                  json_dumps(resp, indent=2))
                    continue

                if source.is_blocked(resp):
                    logging.info(
                        'Skipping response by blocked user: %s',
                        json_dumps(resp.get('author') or resp.get('actor'),
                                   indent=2))
                    continue

                resp.setdefault('activities', []).append(activity)

                # when we find two responses with the same id, the earlier one may have
                # come from a link post or user mention, and this one is probably better
                # since it probably came from the user's activity, so prefer this one.
                # background: https://github.com/snarfed/bridgy/issues/533
                existing = responses.get(id)
                if existing:
                    if source.gr_source.activity_changed(resp,
                                                         existing,
                                                         log=True):
                        logging.warning(
                            'Got two different versions of same response!\n%s\n%s',
                            existing, resp)
                    resp['activities'].extend(existing.get('activities', []))

                responses[id] = resp

        #
        # Step 3: filter out responses we've already seen
        #
        # seen responses (JSON objects) for each source are stored in its entity.
        unchanged_responses = []
        if source.seen_responses_cache_json:
            for seen in json_loads(source.seen_responses_cache_json):
                id = seen['id']
                resp = responses.get(id)
                if resp and not source.gr_source.activity_changed(
                        seen, resp, log=True):
                    unchanged_responses.append(seen)
                    del responses[id]

        #
        # Step 4: store new responses and enqueue propagate tasks
        #
        pruned_responses = []
        source.blocked_ids = None

        for id, resp in responses.items():
            resp_type = Response.get_type(resp)
            activities = resp.pop('activities', [])
            if not activities and resp_type == 'post':
                activities = [resp]
            too_long = set()
            urls_to_activity = {}
            for i, activity in enumerate(activities):
                # we'll usually have multiple responses for the same activity, and the
                # objects in resp['activities'] are shared, so cache each activity's
                # discovered webmention targets inside its object.
                if 'originals' not in activity or 'mentions' not in activity:
                    activity['originals'], activity['mentions'] = \
                      original_post_discovery.discover(
                        source, activity, fetch_hfeed=True,
                        include_redirect_sources=False,
                        already_fetched_hfeeds=fetched_hfeeds)

                targets = original_post_discovery.targets_for_response(
                    resp,
                    originals=activity['originals'],
                    mentions=activity['mentions'])
                if targets:
                    logging.info('%s has %d webmention target(s): %s',
                                 activity.get('url'), len(targets),
                                 ' '.join(targets))
                    # new response to propagate! load block list if we haven't already
                    if source.blocked_ids is None:
                        source.load_blocklist()

                for t in targets:
                    if len(t) <= _MAX_STRING_LENGTH:
                        urls_to_activity[t] = i
                    else:
                        logging.info(
                            'Giving up on target URL over %s chars! %s',
                            _MAX_STRING_LENGTH, t)
                        too_long.add(t[:_MAX_STRING_LENGTH - 4] + '...')

            # store/update response entity. the prune_*() calls are important to
            # remove circular references in link responses, which are their own
            # activities. details in the step 2 comment above.
            pruned_response = util.prune_response(resp)
            pruned_responses.append(pruned_response)
            resp_entity = Response(id=id,
                                   source=source.key,
                                   activities_json=[
                                       json_dumps(
                                           util.prune_activity(a, source))
                                       for a in activities
                                   ],
                                   response_json=json_dumps(pruned_response),
                                   type=resp_type,
                                   unsent=list(urls_to_activity.keys()),
                                   failed=list(too_long),
                                   original_posts=resp.get('originals', []))
            if urls_to_activity and len(activities) > 1:
                resp_entity.urls_to_activity = json_dumps(urls_to_activity)
            resp_entity.get_or_save(source,
                                    restart=self.RESTART_EXISTING_TASKS)

        # update cache
        if pruned_responses:
            source.updates['seen_responses_cache_json'] = json_dumps(
                pruned_responses + unchanged_responses)
コード例 #20
0
ファイル: publish.py プロジェクト: snarfed/bridgy
    def attempt_single_item(self, item):
        """Attempts to preview or publish a single mf2 item.

    Args:
      item: mf2 item dict from mf2py

    Returns:
      CreationResult
    """
        self.maybe_inject_silo_content(item)
        obj = microformats2.json_to_object(item)

        ignore_formatting = self.ignore_formatting(item)
        if ignore_formatting:
            prop = microformats2.first_props(item.get('properties', {}))
            content = microformats2.get_text(prop.get('content'))
            if content:
                obj['content'] = content.strip()

        # which original post URL to include? in order of preference:
        # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173)
        # 2. original user-provided URL if it redirected
        # 3. u-url if available
        # 4. actual final fetched URL
        if self.shortlink:
            obj['url'] = self.shortlink
        elif self.source_url() != self.fetched.url:
            obj['url'] = self.source_url()
        elif 'url' not in obj:
            obj['url'] = self.fetched.url
        logging.debug(
            f'Converted to ActivityStreams object: {json_dumps(obj, indent=2)}'
        )

        # posts and comments need content
        obj_type = obj.get('objectType')
        if (obj_type in ('note', 'article', 'comment')
                and (not obj.get('content') and not obj.get('summary')
                     and not obj.get('displayName'))):
            return gr_source.creation_result(
                abort=False,
                error_plain=f'Could not find content in {self.fetched.url}',
                error_html=
                f'Could not find <a href="http://microformats.org/">content</a> in {self.fetched.url}'
            )

        self.preprocess(obj)

        include_link = self.include_link(item)

        if not self.authorize():
            return gr_source.creation_result(abort=True)

        if self.PREVIEW:
            result = self.source.gr_source.preview_create(
                obj,
                include_link=include_link,
                ignore_formatting=ignore_formatting)
            previewed = result.content or result.description
            if self.entity.type == 'preview':
                self.entity.published = previewed
            if not previewed:
                return result  # there was an error
            return self._render_preview(result, include_link=include_link)

        else:
            result = self.source.gr_source.create(
                obj,
                include_link=include_link,
                ignore_formatting=ignore_formatting)
            self.entity.published = result.content
            if not result.content:
                return result  # there was an error
            if 'url' not in self.entity.published:
                self.entity.published['url'] = obj.get('url')
            self.entity.type = self.entity.published.get(
                'type') or models.get_type(obj)

            ret = json_dumps(self.entity.published, indent=2)
            logging.info(f'Returning {ret}')
            return gr_source.creation_result(ret)
コード例 #21
0
ファイル: webfinger.py プロジェクト: mdheller/bridgy-fed
    def template_vars(self, domain, url=None):
        assert domain

        if domain.split('.')[-1] in NON_TLDS:
            self.error("%s doesn't look like a domain" % domain, status=404)

        # find representative h-card. try url, then url's home page, then domain
        urls = ['http://%s/' % domain]
        if url:
            urls = [url, urllib.parse.urljoin(url, '/')] + urls

        for candidate in urls:
            resp = common.requests_get(candidate)
            parsed = util.parse_html(resp)
            mf2 = util.parse_mf2(parsed, url=resp.url)
            # logging.debug('Parsed mf2 for %s: %s', resp.url, json_dumps(mf2, indent=2))
            hcard = mf2util.representative_hcard(mf2, resp.url)
            if hcard:
                logging.info('Representative h-card: %s',
                             json_dumps(hcard, indent=2))
                break
        else:
            self.error("""\
Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on %s"""
                       % resp.url)

        logging.info('Generating WebFinger data for %s', domain)
        key = models.MagicKey.get_or_create(domain)
        props = hcard.get('properties', {})
        urls = util.dedupe_urls(props.get('url', []) + [resp.url])
        canonical_url = urls[0]

        acct = '%s@%s' % (domain, domain)
        for url in urls:
            if url.startswith('acct:'):
                urluser, urldomain = util.parse_acct_uri(url)
                if urldomain == domain:
                    acct = '%s@%s' % (urluser, domain)
                    logging.info('Found custom username: acct:%s', acct)
                    break

        # discover atom feed, if any
        atom = parsed.find('link',
                           rel='alternate',
                           type=common.CONTENT_TYPE_ATOM)
        if atom and atom['href']:
            atom = urllib.parse.urljoin(resp.url, atom['href'])
        else:
            atom = 'https://granary.io/url?' + urllib.parse.urlencode(
                {
                    'input': 'html',
                    'output': 'atom',
                    'url': resp.url,
                    'hub': resp.url,
                })

        # discover PuSH, if any
        for link in resp.headers.get('Link', '').split(','):
            match = common.LINK_HEADER_RE.match(link)
            if match and match.group(2) == 'hub':
                hub = match.group(1)
            else:
                hub = 'https://bridgy-fed.superfeedr.com/'

        # generate webfinger content
        data = util.trim_nulls({
            'subject':
            'acct:' + acct,
            'aliases':
            urls,
            'magic_keys': [{
                'value': key.href()
            }],
            'links':
            sum(([{
                'rel': 'http://webfinger.net/rel/profile-page',
                'type': 'text/html',
                'href': url,
            }] for url in urls if url.startswith("http")), []) + [{
                'rel': 'http://webfinger.net/rel/avatar',
                'href': url,
            } for url in props.get('photo', [])] + [
                {
                    'rel': 'canonical_uri',
                    'type': 'text/html',
                    'href': canonical_url,
                },

                # ActivityPub
                {
                    'rel': 'self',
                    'type': common.CONTENT_TYPE_AS2,
                    # WARNING: in python 2 sometimes request.host_url lost port,
                    # http://localhost:8080 would become just http://localhost. no
                    # clue how or why. pay attention here if that happens again.
                    'href': '%s/%s' % (self.request.host_url, domain),
                },
                {
                    'rel': 'inbox',
                    'type': common.CONTENT_TYPE_AS2,
                    'href': '%s/%s/inbox' % (self.request.host_url, domain),
                },

                # OStatus
                {
                    'rel': 'http://schemas.google.com/g/2010#updates-from',
                    'type': common.CONTENT_TYPE_ATOM,
                    'href': atom,
                },
                {
                    'rel': 'hub',
                    'href': hub,
                },
                {
                    'rel': 'magic-public-key',
                    'href': key.href(),
                },
                {
                    'rel': 'salmon',
                    'href': '%s/%s/salmon' % (self.request.host_url, domain),
                }
            ]
        })
        logging.info('Returning WebFinger data: %s', json_dumps(data,
                                                                indent=2))
        return data
コード例 #22
0
    def post(self, domain):
        logging.info('Got: %s', self.request.body)

        # parse and validate AS2 activity
        try:
            activity = json_loads(self.request.body)
            assert activity
        except (TypeError, ValueError, AssertionError):
            self.error("Couldn't parse body as JSON", exc_info=True)

        obj = activity.get('object') or {}
        if isinstance(obj, str):
            obj = {'id': obj}

        type = activity.get('type')
        if type == 'Accept':  # eg in response to a Follow
            return  # noop
        if type == 'Create':
            type = obj.get('type')
        elif type not in SUPPORTED_TYPES:
            self.error('Sorry, %s activities are not supported yet.' % type,
                       status=501)

        # TODO: verify signature if there is one

        if type == 'Undo' and obj.get('type') == 'Follow':
            # skip actor fetch below; we don't need it to undo a follow
            return self.undo_follow(self.redirect_unwrap(activity))
        elif type == 'Delete':
            id = obj.get('id')

            # !!! temporarily disabled actually deleting Followers below because
            # mastodon.social sends Deletes for every Bridgy Fed account, all at
            # basically the same time, and we have many Follower objects, so we
            # have to do this table scan for each one, so the requests take a
            # long time and end up spawning extra App Engine instances that we
            # get billed for. and the Delete requests are almost never for
            # followers we have. TODO: revisit this and do it right.

            # if isinstance(id, str):
            #     # assume this is an actor
            #     # https://github.com/snarfed/bridgy-fed/issues/63
            #     for key in Follower.query().iter(keys_only=True):
            #         if key.id().split(' ')[-1] == id:
            #             key.delete()
            return

        # fetch actor if necessary so we have name, profile photo, etc
        for elem in obj, activity:
            actor = elem.get('actor')
            if actor and isinstance(actor, str):
                elem['actor'] = common.get_as2(actor).json()

        activity_unwrapped = self.redirect_unwrap(activity)
        if type == 'Follow':
            return self.accept_follow(activity, activity_unwrapped)

        # send webmentions to each target
        as1 = as2.to_as1(activity)
        self.send_webmentions(as1, proxy=True, protocol='activitypub',
                              source_as2=json_dumps(activity_unwrapped))
コード例 #23
0
 def expect_get_publications(self, pubs):
     # https://github.com/Medium/medium-api-docs/#user-content-listing-the-users-publications
     self.expect_requests_get('users/abcdef01234/publications',
                              json_dumps(pubs))
     self.mox.ReplayAll()
コード例 #24
0
ファイル: test_webmention.py プロジェクト: snarfed/bridgy-fed
    def test_activitypub_create_post(self, mock_get, mock_post):
        mock_get.side_effect = [self.create, self.actor]
        mock_post.return_value = requests_response('abc xyz')

        Follower.get_or_create('orig', 'https://mastodon/aaa')
        Follower.get_or_create('orig',
                               'https://mastodon/bbb',
                               last_follow=json_dumps({
                                   'actor': {
                                       'publicInbox': 'https://public/inbox',
                                       'inbox': 'https://unused',
                                   }
                               }))
        Follower.get_or_create('orig',
                               'https://mastodon/ccc',
                               last_follow=json_dumps({
                                   'actor': {
                                       'endpoints': {
                                           'sharedInbox':
                                           'https://shared/inbox',
                                       },
                                   }
                               }))
        Follower.get_or_create('orig',
                               'https://mastodon/ddd',
                               last_follow=json_dumps(
                                   {'actor': {
                                       'inbox': 'https://inbox',
                                   }}))
        Follower.get_or_create('orig',
                               'https://mastodon/eee',
                               status='inactive',
                               last_follow=json_dumps(
                                   {'actor': {
                                       'inbox': 'https://unused/2',
                                   }}))
        Follower.get_or_create(
            'orig',
            'https://mastodon/fff',
            last_follow=json_dumps({
                'actor': {
                    # dupe of eee; should be de-duped
                    'inbox': 'https://inbox',
                }
            }))

        got = self.client.post('/webmention',
                               data={
                                   'source': 'http://orig/post',
                                   'target': 'https://fed.brid.gy/',
                               })
        self.assertEqual(200, got.status_code)

        mock_get.assert_has_calls((self.req('http://orig/post'), ))

        inboxes = ('https://inbox', 'https://public/inbox',
                   'https://shared/inbox')
        self.assertEqual(len(inboxes), len(mock_post.call_args_list))
        for call, inbox in zip(mock_post.call_args_list, inboxes):
            self.assertEqual((inbox, ), call[0])
            self.assertEqual(self.create_as2, json_loads(call[1]['data']))

        for inbox in inboxes:
            resp = Response.get_by_id('http://orig/post %s' % inbox)
            self.assertEqual('out', resp.direction, inbox)
            self.assertEqual('activitypub', resp.protocol, inbox)
            self.assertEqual('complete', resp.status, inbox)
            self.assertEqual(self.create_mf2, json_loads(resp.source_mf2),
                             inbox)
コード例 #25
0
ファイル: test_browser.py プロジェクト: sheyril/bridgy
    def test_get_activities_response_activity_id(self):
        Activity(id='tag:fa.ke,2013:123',
                 activity_json=json_dumps({'foo': 'bar'})).put()

        resp = self.source.get_activities_response(activity_id='123')
        self.assertEqual([{'foo': 'bar'}], resp['items'])
コード例 #26
0
ファイル: test_twitter.py プロジェクト: swamim/bridgy
    def test_search_for_links(self):
        """https://github.com/snarfed/bridgy/issues/565"""
        self.tw.domain_urls = [
            'http://foo/', 'http://bar/baz', 'https://t.co/xyz'
        ]
        self.tw.put()

        results = [
            {
                'id_str': '0',  # no link
                'text': 'x foo/ y /bar/baz z',
            },
            {
                'id_str': '1',  # yes, ignore http vs https for bar/baz
                'text': 'no link here',
                'entities': {
                    'urls': [
                        {
                            'expanded_url': 'http://bar'
                        },
                        {
                            'expanded_url': 'https://bar/baz'
                        },
                    ]
                },
            },
            {
                'id_str': '2',  # no, retweet
                'text': 'a http://bar/baz ok',
                'retweeted_status': {
                    'id_str': '456',
                    'text': 'a http://bar/baz ok',
                },
            },
            {
                'id_str': '3',  # no, link domain is blacklisted
                'text': 'x https://t.co/xyz/abc z',
            },
            {
                'id_str': '4',  # no link
                'text': 'x http://bar/baz z',
            },
            {
                'id_str': '5',  # yes
                'text': 'no link here',
                'entities': {
                    'urls': [{
                        'expanded_url': 'http://foo/x?y'
                    }]
                },
            },
            {
                'id_str': '6',  # yes
                'text': 'a link http://bar/baz here',
                'entities': {
                    'urls': [{
                        'expanded_url': 'http://foo/'
                    }, {
                        'expanded_url': 'http://other'
                    }]
                },
            }
        ]
        self.expect_urlopen(
            API_BASE + API_SEARCH % {
                'q': urllib.parse.quote_plus('bar/baz OR foo'),
                'count': 50
            }, json_dumps({'statuses': results}))

        self.mox.ReplayAll()
        self.assert_equals([
            'tag:twitter.com,2013:1', 'tag:twitter.com,2013:5',
            'tag:twitter.com,2013:6'
        ], [a['id'] for a in self.tw.search_for_links()])
コード例 #27
0
  def poll(self, source):
    """Actually runs the poll.

    Stores property names and values to update in source.updates.
    """
    if source.last_activities_etag or source.last_activity_id:
      logging.debug('Using ETag %s, last activity id %s',
                    source.last_activities_etag, source.last_activity_id)

    #
    # Step 1: fetch activities:
    # * posts by the user
    # * search all posts for the user's domain URLs to find links
    #
    cache = util.CacheDict()
    if source.last_activities_cache_json:
      cache.update(json_loads(source.last_activities_cache_json))

    # search for links first so that the user's activities and responses
    # override them if they overlap
    links = source.search_for_links()

    # this user's own activities (and user mentions)
    resp = source.get_activities_response(
      fetch_replies=True, fetch_likes=True, fetch_shares=True,
      fetch_mentions=True, count=50, etag=source.last_activities_etag,
      min_id=source.last_activity_id, cache=cache)
    etag = resp.get('etag')  # used later
    user_activities = resp.get('items', [])

    # these map ids to AS objects
    responses = {a['id']: a for a in links}
    activities = {a['id']: a for a in links + user_activities}

    # extract silo activity ids, update last_activity_id
    silo_activity_ids = set()
    last_activity_id = source.last_activity_id
    for id, activity in activities.items():
      # maybe replace stored last activity id
      parsed = util.parse_tag_uri(id)
      if parsed:
        id = parsed[1]
      silo_activity_ids.add(id)
      try:
        # try numeric comparison first
        greater = int(id) > int(last_activity_id)
      except (TypeError, ValueError):
        greater = id > last_activity_id
      if greater:
        last_activity_id = id

    if last_activity_id and last_activity_id != source.last_activity_id:
      source.updates['last_activity_id'] = last_activity_id

    # trim cache to just the returned activity ids, so that it doesn't grow
    # without bound. (WARNING: depends on get_activities_response()'s cache key
    # format, e.g. 'PREFIX ACTIVITY_ID'!)
    source.updates['last_activities_cache_json'] = json_dumps(
      {k: v for k, v in cache.items() if k.split()[-1] in silo_activity_ids})

    self.backfeed(source, responses, activities=activities)

    source.updates.update({'last_polled': source.last_poll_attempt,
                           'poll_status': 'ok'})
    if etag and etag != source.last_activities_etag:
      source.updates['last_activities_etag'] = etag

    #
    # Possibly refetch updated syndication urls.
    #
    # if the author has added syndication urls since the first time
    # original_post_discovery ran, we'll miss them. this cleanup task will
    # periodically check for updated urls. only kicks in if the author has
    # *ever* published a rel=syndication url
    if source.should_refetch():
      logging.info('refetching h-feed for source %s', source.label())
      relationships = original_post_discovery.refetch(source)

      now = util.now_fn()
      source.updates['last_hfeed_refetch'] = now

      if relationships:
        logging.info('refetch h-feed found new rel=syndication relationships: %s',
                     relationships)
        try:
          self.repropagate_old_responses(source, relationships)
        except BaseException as e:
          if ('BadRequestError' in str(e.__class__) or
              'Timeout' in str(e.__class__) or
              util.is_connection_failure(e)):
            logging.info('Timeout while repropagating responses.', exc_info=True)
          else:
            raise
    else:
      logging.info(
          'skipping refetch h-feed. last-syndication-url %s, last-refetch %s',
          source.last_syndication_url, source.last_hfeed_refetch)
コード例 #28
0
ファイル: test_app.py プロジェクト: swamim/bridgy
    def test_retry(self):
        source = self.sources[0]
        source.domain_urls = ['http://orig']
        source.last_hfeed_refetch = last_hfeed_refetch = \
            testutil.NOW - datetime.timedelta(minutes=1)
        source.put()

        resp = self.responses[0]
        resp.status = 'complete'
        resp.unsent = ['http://unsent']
        resp.sent = ['http://sent']
        resp.error = ['http://error']
        resp.failed = ['http://failed']
        resp.skipped = ['https://skipped']

        # SyndicatedPost with new target URLs
        resp.activities_json = [
            json_dumps({'object': {
                'url': 'https://fa.ke/1'
            }}),
            json_dumps({
                'url': 'https://fa.ke/2',
                'object': {
                    'unused': 'ok'
                }
            }),
            json_dumps({'url': 'https://fa.ke/3'}),
        ]
        resp.put()
        SyndicatedPost.insert(source, 'https://fa.ke/1', 'https://orig/1')
        SyndicatedPost.insert(source, 'https://fa.ke/2', 'http://orig/2')
        SyndicatedPost.insert(source, 'https://fa.ke/3', 'http://orig/3')

        key = resp.key.urlsafe().decode()
        self.expect_task('propagate', response_key=key)
        self.mox.ReplayAll()

        # cached webmention endpoint
        util.webmention_endpoint_cache['W https skipped /'] = 'asdf'

        response = app.application.get_response('/retry',
                                                method='POST',
                                                text=urlencode({'key': key}))
        self.assertEqual(302, response.status_int)
        self.assertEqual(source.bridgy_url(self.handler),
                         response.headers['Location'].split('#')[0])

        # status and URLs should be refreshed
        got = resp.key.get()
        self.assertEqual('new', got.status)
        self.assertCountEqual([
            'http://unsent/', 'http://sent/', 'https://skipped/',
            'http://error/', 'http://failed/', 'https://orig/1',
            'http://orig/2', 'http://orig/3'
        ], got.unsent)
        for field in got.sent, got.skipped, got.error, got.failed:
            self.assertEqual([], field)

        # webmention endpoints for URL domains should be refreshed
        self.assertNotIn('W https skipped /', util.webmention_endpoint_cache)

        # shouldn't have refetched h-feed
        self.assertEqual(last_hfeed_refetch,
                         source.key.get().last_hfeed_refetch)