Example #1
0
    def get(self):
        site = util.get_required_param(self, 'site')
        user = self.request.get('user_id') or source.ME
        group = self.request.get('group_id') or source.ALL
        if group == '@list':
            group = util.get_required_param(self, 'list')

        activity_id = search_query = ''
        if group == source.SEARCH:
            search_query = self.request.get('search_query', '')
        elif group != source.BLOCKS:
            activity_id = self.request.get('activity_id', '')

        # pass query params through
        params = dict(self.request.params.items())
        params.update({
            'plaintext': 'true',
            'cache': 'false',
            'search_query': search_query,
        })

        path = '/'.join(
            urllib.parse.quote_plus(part, safe='@')
            for part in (site, user, group, '@app', activity_id))
        return self.redirect(f'/{path}?{urllib.parse.urlencode(params)}')
Example #2
0
    def get(self):
        source = util.get_required_param(self, 'source')
        target = util.get_required_param(self, 'target')

        id = '%s %s' % (source, target)
        resp = Response.get_by_id(id)
        if not resp:
            self.abort(404, 'No stored response for %s' % id)

        if resp.source_mf2:
            as1 = microformats2.json_to_object(json.loads(resp.source_mf2))
        elif resp.source_as2:
            as1 = as2.to_as1(json.loads(resp.source_as2))
        elif resp.source_atom:
            as1 = atom.atom_to_activity(resp.source_atom)
        else:
            self.abort(404, 'Stored response for %s has no data' % id)

        # add HTML meta redirect to source page. should trigger for end users in
        # browsers but not for webmention receivers (hopefully).
        html = microformats2.activities_to_html([as1])
        utf8 = '<meta charset="utf-8">'
        refresh = '<meta http-equiv="refresh" content="0;url=%s">' % source
        html = html.replace(utf8, utf8 + '\n' + refresh)

        self.response.write(html)
Example #3
0
  def get(self):
    expected_inputs = ('activitystreams', 'html', 'json-mf2')
    input = util.get_required_param(self, 'input')
    if input not in expected_inputs:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, expected_inputs))

    # fetch url
    url = util.get_required_param(self, 'url')
    logging.info('Fetching %s', url)
    resp = urllib2.urlopen(url, timeout=appengine_config.HTTP_TIMEOUT)
    if url != resp.geturl():
      logging.info('Redirected to %s', resp.geturl())
    body = resp.read()

    # decode data
    if input == 'activitystreams':
      activities = json.loads(body)
    elif input == 'html':
      activities = microformats2.html_to_activities(body, resp.geturl())
    elif input == 'json-mf2':
      activities = [microformats2.json_to_object(item)
                    for item in json.loads(body).get('items', [])]

    self.write_response(source.Source.make_activities_base_response(activities))
Example #4
0
    def get(self):
        expected_inputs = ('activitystreams', 'html', 'json-mf2')
        input = util.get_required_param(self, 'input')
        if input not in expected_inputs:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, expected_inputs))

        # fetch url
        url = util.get_required_param(self, 'url')
        logging.info('Fetching %s', url)
        resp = urllib2.urlopen(url, timeout=appengine_config.HTTP_TIMEOUT)
        if url != resp.geturl():
            logging.info('Redirected to %s', resp.geturl())
        body = resp.read()

        # decode data
        if input == 'activitystreams':
            activities = json.loads(body)
        elif input == 'html':
            activities = microformats2.html_to_activities(body, resp.geturl())
        elif input == 'json-mf2':
            activities = [
                microformats2.json_to_object(item)
                for item in json.loads(body).get('items', [])
            ]

        self.write_response(
            source.Source.make_activities_base_response(activities))
Example #5
0
  def get(self):
    expected_inputs = ('activitystreams', 'html', 'json-mf2')
    input = util.get_required_param(self, 'input')
    if input not in expected_inputs:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, expected_inputs))
    url = util.get_required_param(self, 'url')

    # check if request is cached
    cache = self.request.get('cache', '').lower() != 'false'
    cache_key = 'U %s' % url
    cached = memcache.get(cache_key) if cache else None

    if cached:
      logging.info('Serving cached response %r', cache_key)
      url = cached['url']
      body = cached['body']
    else:
      # fetch url
      try:
        resp = util.urlopen(url)
      except (ValueError, httplib.InvalidURL) as e:
        self.abort(400, str(e))
        # other exceptions are handled by webutil.handlers.handle_exception(),
        # which uses interpret_http_exception(), etc.

      if url != resp.geturl():
        url = resp.geturl()
        logging.info('Redirected to %s', url)
      body = resp.read()

      if cache:
        logging.info('Caching response in %r', cache_key)
        memcache.set(cache_key, {'url': url, 'body': body}, URL_CACHE_TIME)

    # decode data
    mf2 = None
    if input == 'html':
      mf2 = mf2py.parse(doc=body, url=url)
    elif input == 'json-mf2':
      mf2 = json.loads(body)
      mf2.setdefault('rels', {})  # mf2util expects rels

    actor = None
    title = None
    if mf2:
      actor = microformats2.find_author(
        mf2, fetch_mf2_func=lambda url: mf2py.parse(url=url))
      title = mf2util.interpret_feed(mf2, url).get('name')

    if input == 'activitystreams':
      activities = json.loads(body)
    elif input == 'html':
      activities = microformats2.html_to_activities(body, url, actor)
    elif input == 'json-mf2':
      activities = [microformats2.json_to_object(item, actor=actor)
                    for item in mf2.get('items', [])]

    self.write_response(source.Source.make_activities_base_response(activities),
                        url=url, actor=actor, title=title)
Example #6
0
  def get(self):
    self.response.headers['Content-Type'] = 'application/atom+xml'
    tw = twitter.Twitter(util.get_required_param(self, 'access_token_key'),
                         util.get_required_param(self, 'access_token_secret'))

    list_str = self.request.get('list')
    if list_str:
      if list_str == 'tonysss13/financial':
        raise exc.HTTPTooManyRequests("Please reduce your feed reader's polling rate.")

      # this pattern is duplicated in index.html.
      # also note that list names allow more characters that usernames, but the
      # allowed characters aren't explicitly documented. :/ details:
      # https://groups.google.com/d/topic/twitter-development-talk/lULdIVR3B9s/discussion
      match = re.match(r'@?([A-Za-z0-9_]+)/([A-Za-z0-9_-]+)', list_str)
      if not match:
        self.abort(400, 'List must be of the form username/list (got %r)' % list_str)
      user_id, group_id = match.groups()
      actor = tw.get_actor(user_id)
      activities = tw.get_activities(user_id=user_id, group_id=group_id, count=50)
    else:
      actor = tw.get_actor()
      activities = tw.get_activities(count=50)

    title = 'twitter-atom feed for %s' % (list_str or actor.get('username', ''))
    try:
      self.response.out.write(atom.activities_to_atom(
        activities, actor, title=title, host_url=self.request.host_url + '/',
        request_url=self.request.path_url, xml_base='https://twitter.com/'))
    except DeadlineExceededError:
      logging.warning('Hit 60s overall request deadline, returning 503.', exc_info=True)
      raise exc.HTTPServiceUnavailable()
Example #7
0
 def post(self):
   url = '/oauth_callback?%s' % urllib.urlencode({
       'list': self.request.get('list', '').encode('utf-8'),
       'consumer_key': util.get_required_param(self, 'consumer_key'),
       'consumer_secret': util.get_required_param(self, 'consumer_secret'),
       })
   handler = oauth_twitter.StartHandler.to(url)(self.request, self.response)
   return handler.post()
Example #8
0
 def post(self):
   url = '/oauth_callback?%s' % urllib.urlencode({
       'list': self.request.get('list', '').encode('utf-8'),
       'consumer_key': util.get_required_param(self, 'consumer_key'),
       'consumer_secret': util.get_required_param(self, 'consumer_secret'),
       })
   handler = oauth_twitter.StartHandler.to(url)(self.request, self.response)
   return handler.post()
Example #9
0
  def get(self):
    self.response.headers['Content-Type'] = 'application/atom+xml'

    # New style feed with user-provided app (consumer) key and secret
    if (not self.request.get('consumer_key') and
        not self.request.get('consumer_secret')):
      # Welcome back message for old feeds
      self.response.out.write("""\
<?xml version="1.0" encoding="UTF-8"?>
<feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
<generator uri="https://twitter-atom.appspot.com/" version="0.1">twitter-atom</generator>
<id>https://twitter-atom.appspot.com/</id>
<title>Twitter Atom feeds is back!</title>
<updated>2013-07-08T20:00:00</updated>
<entry>
<id>tag:twitter-atom.appspot.com,2013:2</id>
<title>Twitter Atom feeds is back!</title>
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">
<p style="color: red; font-style: italic;"><b>Twitter Atom feeds is back! I'm experimenting with a new design that Twitter will (hopefully) be ok with. You can try it out by <a href="http://twitter-atom.appspot.com/">generating a new feed here</a>. Feel free to <a href="http://twitter.com/snarfed_org">ping me</a> if you have any questions. Welcome back!</b></p>
</div>
</content>
<published>2013-07-08T20:00:00</published>
</entry>
</feed>
""")
      return

    tw = twitter.Twitter(util.get_required_param(self, 'access_token_key'),
                         util.get_required_param(self, 'access_token_secret'))

    list_str = self.request.get('list')
    if list_str:
      if list_str == 'tonysss13/financial':
        raise exc.HTTPTooManyRequests("Please reduce your feed reader's polling rate.")

      # this pattern is duplicated in index.html.
      # also note that list names allow more characters that usernames, but the
      # allowed characters aren't explicitly documented. :/ details:
      # https://groups.google.com/d/topic/twitter-development-talk/lULdIVR3B9s/discussion
      match = re.match(r'@?([A-Za-z0-9_]+)/([A-Za-z0-9_-]+)', list_str)
      if not match:
        self.abort(400, 'List must be of the form username/list (got %r)' % list_str)
      user_id, group_id = match.groups()
      actor = tw.get_actor(user_id)
      activities = tw.get_activities(user_id=user_id, group_id=group_id, count=50)
    else:
      actor = tw.get_actor()
      activities = tw.get_activities(count=50)

    title = 'twitter-atom feed for %s' % (list_str or actor.get('username', ''))
    try:
      self.response.out.write(atom.activities_to_atom(
        activities, actor, title=title, host_url=self.request.host_url + '/',
        request_url=self.request.path_url, xml_base='https://twitter.com/'))
    except DeadlineExceededError:
      logging.warning('Hit 60s overall request deadline, returning 503.', exc_info=True)
      raise exc.HTTPServiceUnavailable()
Example #10
0
  def get(self):
    self.response.headers['Content-Type'] = 'application/atom+xml'

    # New style feed with user-provided app (consumer) key and secret
    if (not self.request.get('consumer_key') and
        not self.request.get('consumer_secret')):
      # Welcome back message for old feeds
      self.response.out.write("""\
<?xml version="1.0" encoding="UTF-8"?>
<feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
<generator uri="https://twitter-atom.appspot.com/" version="0.1">twitter-atom</generator>
<id>https://twitter-atom.appspot.com/</id>
<title>Twitter Atom feeds is back!</title>
<updated>2013-07-08T20:00:00</updated>
<entry>
<id>tag:twitter-atom.appspot.com,2013:2</id>
<title>Twitter Atom feeds is back!</title>
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">
<p style="color: red; font-style: italic;"><b>Twitter Atom feeds is back! I'm experimenting with a new design that Twitter will (hopefully) be ok with. You can try it out by <a href="http://twitter-atom.appspot.com/">generating a new feed here</a>. Feel free to <a href="http://twitter.com/snarfed_org">ping me</a> if you have any questions. Welcome back!</b></p>
</div>
</content>
<published>2013-07-08T20:00:00</published>
</entry>
</feed>
""")
      return

    tw = twitter.Twitter(util.get_required_param(self, 'access_token_key'),
                         util.get_required_param(self, 'access_token_secret'))

    list_str = self.request.get('list')
    if list_str:
      # this pattern is duplicated in index.html.
      # also note that list names allow more characters that usernames, but the
      # allowed characters aren't explicitly documented. :/ details:
      # https://groups.google.com/d/topic/twitter-development-talk/lULdIVR3B9s/discussion
      match = re.match(r'@?([A-Za-z0-9_]+)/([A-Za-z0-9_-]+)', list_str)
      if not match:
        self.abort(400, 'List must be of the form username/list (got %r)' % list_str)
      user_id, group_id = match.groups()
      actor = tw.get_actor(user_id)
      activities = tw.get_activities(user_id=user_id, group_id=group_id, count=50)
    else:
      actor = tw.get_actor()
      activities = tw.get_activities(count=50)

    title = 'twitter-atom feed for %s' % (list_str or actor.get('username', ''))
    self.response.out.write(atom.activities_to_atom(
        activities, actor, title=title, host_url=self.request.host_url + '/',
        request_url=self.request.path_url, xml_base='https://twitter.com/'))
Example #11
0
    def post(self):
        logging.info('(Params: %s )', self.request.params.items())

        source = util.get_required_param(self, 'source')
        if urlparse.urlparse(source).netloc.split(':')[0] != 'localhost':
          try:
              msg = 'Bridgy Fed: new webmention from %s' % source
              mail.send_mail(
                  sender='*****@*****.**',
                  to='*****@*****.**',
                  subject=msg, body=msg)
          except BaseException:
              logging.warning('Error sending email', exc_info=True)

        self.resp = None
        try:
            self.try_activitypub()
            if self.resp:
                self.resp.status = 'complete'
        except:
            if self.resp:
                self.resp.status = 'error'
            raise
        finally:
            if self.resp:
                self.resp.put()
Example #12
0
    def post(self):
        logging.info('(Params: %s )', self.request.params.items())

        # fetch source page
        source = util.get_required_param(self, 'source')
        source_resp = common.requests_get(source)
        self.source_url = source_resp.url or source
        self.source_domain = urlparse.urlparse(self.source_url).netloc.split(':')[0]
        self.source_mf2 = mf2py.parse(source_resp.text, url=self.source_url, img_with_alt=True)
        # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(self.source_mf2, indent=2))

        # check for backlink to bridgy fed (for webmention spec and to confirm
        # source's intent to federate to mastodon)
        if (self.request.host_url not in source_resp.text and
            urllib.quote(self.request.host_url, safe='') not in source_resp.text):
            common.error(self, "Couldn't find link to %s" % self.request.host_url)

        # convert source page to ActivityStreams
        entry = mf2util.find_first_entry(self.source_mf2, ['h-entry'])
        if not entry:
            common.error(self, 'No microformats2 found on %s' % self.source_url)

        logging.info('First entry: %s', json.dumps(entry, indent=2))
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [self.source_url]

        self.source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info('Converted to AS1: %s', json.dumps(self.source_obj, indent=2))

        self.try_activitypub() or self.try_salmon()
Example #13
0
    def get(self):
        expected_inputs = ('activitystreams', 'html', 'json-mf2', 'jsonfeed')
        input = util.get_required_param(self, 'input')
        if input not in expected_inputs:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, expected_inputs))
        url, body = self._urlopen(util.get_required_param(self, 'url'))

        # decode data
        mf2 = None
        if input == 'html':
            mf2 = mf2py.parse(doc=body, url=url)
        elif input == 'json-mf2':
            mf2 = json.loads(body)
            mf2.setdefault('rels', {})  # mf2util expects rels

        actor = None
        title = None
        if mf2:

            def fetch_mf2_func(url):
                _, doc = self._urlopen(url)
                return mf2py.parse(doc=doc, url=url)

            actor = microformats2.find_author(mf2,
                                              fetch_mf2_func=fetch_mf2_func)
            title = mf2util.interpret_feed(mf2, url).get('name')

        if input == 'activitystreams':
            activities = json.loads(body)
        elif input == 'html':
            activities = microformats2.html_to_activities(body, url, actor)
        elif input == 'json-mf2':
            activities = [
                microformats2.json_to_object(item, actor=actor)
                for item in mf2.get('items', [])
            ]
        elif input == 'jsonfeed':
            activities, actor = jsonfeed.jsonfeed_to_activities(
                json.loads(body))

        self.write_response(
            source.Source.make_activities_base_response(activities),
            url=url,
            actor=actor,
            title=title)
Example #14
0
  def finish(self, auth_entity, state=None):
    if not auth_entity:
      logging.info('User declined Twitter auth prompt')
      return self.redirect('/')

    token_key, token_secret = auth_entity.access_token()
    atom_url = self.request.host_url + '/atom?' + urllib.urlencode({
        'consumer_key': util.get_required_param(self, 'consumer_key'),
        'consumer_secret': util.get_required_param(self, 'consumer_secret'),
        'access_token_key': token_key,
        'access_token_secret': token_secret,
        'list': self.request.get('list', '').encode('utf-8'),
        })
    logging.info('generated feed URL: %s', atom_url)
    env = jinja2.Environment(loader=jinja2.FileSystemLoader(('.')), autoescape=True)
    self.response.out.write(env.get_template('templates/generated.html').render(
      {'atom_url': atom_url}))
Example #15
0
  def finish(self, auth_entity, state=None):
    if not auth_entity:
      logging.info('User declined Twitter auth prompt')
      return self.redirect('/')

    token_key, token_secret = auth_entity.access_token()
    atom_url = self.request.host_url + '/atom?' + urllib.urlencode({
        'consumer_key': util.get_required_param(self, 'consumer_key'),
        'consumer_secret': util.get_required_param(self, 'consumer_secret'),
        'access_token_key': token_key,
        'access_token_secret': token_secret,
        'list': self.request.get('list', '').encode('utf-8'),
        })
    logging.info('generated feed URL: %s', atom_url)
    env = jinja2.Environment(loader=jinja2.FileSystemLoader(('.')), autoescape=True)
    self.response.out.write(env.get_template('templates/generated.html').render(
      {'atom_url': atom_url}))
Example #16
0
    def get(self):
        source = util.get_required_param(self, 'source')
        target = util.get_required_param(self, 'target')

        id = '%s %s' % (source, target)
        resp = Response.get_by_id(id)
        if not resp:
            self.abort(404, 'No stored response for %s' % id)

        if resp.source_mf2:
            as1 = microformats2.json_to_object(json.loads(resp.source_mf2))
        elif resp.source_as2:
            as1 = as2.to_as1(json.loads(resp.source_as2))
        elif resp.source_atom:
            as1 = atom.atom_to_activity(resp.source_atom)
        else:
            self.abort(404, 'Stored response for %s has no data' % id)

        self.response.write(microformats2.activities_to_html([as1]))
Example #17
0
    def template_vars(self):
        resource = util.get_required_param(self, 'resource')
        try:
            _, domain = util.parse_acct_uri(resource)
        except ValueError:
            domain = urlparse.urlparse(resource).netloc or resource

        url = None
        if resource.startswith('http://') or resource.startswith('https://'):
            url = resource

        return super(WebfingerHandler, self).template_vars(domain, url=url)
Example #18
0
  def get(self):
    """Handles an API GET.

    Request path is of the form /site/user_id/group_id/app_id/activity_id ,
    where each element except site is an optional string object id.
    """
    # parse path
    args = urllib.unquote(self.request.path).strip('/').split('/')
    if not args or len(args) > MAX_PATH_LEN:
      raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' %
                             (MAX_PATH_LEN, len(args)))

    # make source instance
    site = args.pop(0)
    if site == 'twitter':
      src = twitter.Twitter(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'facebook':
      src = facebook.Facebook(
        access_token=util.get_required_param(self, 'access_token'))
    elif site == 'flickr':
      src = flickr.Flickr(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'instagram':
      src = instagram.Instagram(
        access_token=util.get_required_param(self, 'access_token'))
    elif site == 'google+':
      auth_entity = util.get_required_param(self, 'auth_entity')
      src = googleplus.GooglePlus(auth_entity=ndb.Key(urlsafe=auth_entity).get())
    else:
      src_cls = source.sources.get(site)
      if not src_cls:
        raise exc.HTTPNotFound('Unknown site %r' % site)
      src = src_cls(**self.request.params)

    # handle default path elements
    args = [None if a in defaults else a
            for a, defaults in zip(args, PATH_DEFAULTS)]
    user_id = args[0] if args else None

    # fetch actor if necessary
    actor = None
    if self.request.get('format') == 'atom':
      # atom needs actor
      args = [None if a in defaults else a  # handle default path elements
              for a, defaults in zip(args, PATH_DEFAULTS)]
      user_id = args[0] if args else None
      actor = src.get_actor(user_id) if src else {}

    # get activities and write response
    response = src.get_activities_response(*args, **self.get_kwargs())
    self.write_response(response, actor=actor)
Example #19
0
  def get(self):
    """Handles an API GET.

    Request path is of the form /site/user_id/group_id/app_id/activity_id ,
    where each element except site is an optional string object id.
    """
    # parse path
    args = urllib.unquote(self.request.path).strip('/').split('/')
    if not args or len(args) > MAX_PATH_LEN:
      raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' %
                             (MAX_PATH_LEN, len(args)))

    # make source instance
    site = args.pop(0)
    if site == 'twitter':
      src = twitter.Twitter(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'facebook':
      src = facebook.Facebook(
        access_token=util.get_required_param(self, 'access_token'))
    elif site == 'flickr':
      src = flickr.Flickr(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'instagram':
      src = instagram.Instagram(
        access_token=util.get_required_param(self, 'access_token'))
    elif site == 'google+':
      auth_entity = util.get_required_param(self, 'auth_entity')
      src = googleplus.GooglePlus(auth_entity=ndb.Key(urlsafe=auth_entity).get())
    else:
      src_cls = source.sources.get(site)
      if not src_cls:
        raise exc.HTTPNotFound('Unknown site %r' % site)
      src = src_cls(**self.request.params)

    # handle default path elements
    args = [None if a in defaults else a
            for a, defaults in zip(args, PATH_DEFAULTS)]
    user_id = args[0] if args else None

    # fetch actor if necessary
    actor = None
    if self.request.get('format') == 'atom':
      # atom needs actor
      args = [None if a in defaults else a  # handle default path elements
              for a, defaults in zip(args, PATH_DEFAULTS)]
      user_id = args[0] if args else None
      actor = src.get_actor(user_id) if src else {}

    # get activities and write response
    response = src.get_activities_response(*args, **self.get_kwargs())
    self.write_response(response, actor=actor)
Example #20
0
  def get(self):
    site = util.get_required_param(self, 'site')
    user = self.request.get('user_id') or source.ME
    group = self.request.get('group_id') or source.ALL
    if group == '@list':
      group = util.get_required_param(self, 'list')

    activity_id = search_query = ''
    if group == source.SEARCH:
      search_query = self.request.get('search_query', '').encode('utf-8')
    elif group != source.BLOCKS:
      activity_id = self.request.get('activity_id', '').encode('utf-8')

    # pass query params through
    params = dict(self.request.params.items())
    params.update({
      'plaintext': 'true',
      'cache': 'false',
      'search_query': search_query,
    })

    return self.redirect('/%s/%s/%s/@app/%s?%s' % (
      site, urllib.quote_plus(user.encode('utf-8')), group, activity_id,
      urllib.urlencode(params)))
Example #21
0
  def get(self):
    expected_inputs = ('activitystreams', 'html', 'json-mf2')
    input = util.get_required_param(self, 'input')
    if input not in expected_inputs:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, expected_inputs))

    # fetch url
    url = util.get_required_param(self, 'url')
    resp = util.urlopen(url)
    if url != resp.geturl():
      url = resp.geturl()
      logging.info('Redirected to %s', url)
    body = resp.read()

    # decode data
    mf2 = None
    if input == 'activitystreams':
      activities = json.loads(body)
    elif input == 'html':
      activities = microformats2.html_to_activities(body, url)
      mf2 = mf2py.parse(doc=body, url=url)
    elif input == 'json-mf2':
      mf2 = json.loads(body)
      mf2['rels'] = {}  # mf2util expects rels
      activities = [microformats2.json_to_object(item)
                    for item in mf2.get('items', [])]

    author = None
    title = None
    if mf2:
      author = microformats2.find_author(mf2)
      title = mf2util.interpret_feed(mf2, url).get('name')

    self.write_response(source.Source.make_activities_base_response(activities),
                        url=url, actor=author, title=title)
Example #22
0
    def get(self):
        site = util.get_required_param(self, 'site')
        user = self.request.get('user_id') or source.ME
        group = self.request.get('group_id') or source.ALL
        if group == '@list':
            group = util.get_required_param(self, 'list')

        activity_id = search_query = ''
        if group == source.SEARCH:
            search_query = self.request.get('search_query', '').encode('utf-8')
        elif group != source.BLOCKS:
            activity_id = self.request.get('activity_id', '').encode('utf-8')

        # pass query params through
        params = dict(self.request.params.items())
        params.update({
            'plaintext': 'true',
            'cache': 'false',
            'search_query': search_query,
        })

        return self.redirect('/%s/%s/%s/@app/%s?%s' %
                             (site, urllib.quote_plus(user.encode('utf-8')),
                              group, activity_id, urllib.urlencode(params)))
Example #23
0
  def get(self):
    cookie = 'sessionid=%s' % urllib.quote(
      util.get_required_param(self, 'sessionid').encode('utf-8'))
    logging.info('Fetching with Cookie: %s', cookie)

    host_url = self.request.host_url + '/'
    ig = instagram.Instagram()
    try:
      resp = ig.get_activities_response(group_id=source.FRIENDS, scrape=True,
                                        cookie=cookie)
    except Exception as e:
      status, text = util.interpret_http_exception(e)
      if status in ('401', '403'):
        self.response.headers['Content-Type'] = 'application/atom+xml'
        self.response.out.write(atom.activities_to_atom([{
          'object': {
            'url': self.request.url,
            'content': 'Your instagram-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>' % host_url,
            },
          }], {}, title='instagram-atom', host_url=host_url,
          request_url=self.request.path_url))
        return
      elif status:
        self.response.status = 502 if int(status) // 100 == 5 else status
      elif util.is_connection_failure(e):
        self.response.status = 504  # HTTP 504 Gateway Timeout
      else:
        logging.exception('oops!')
        self.response.status = 500

      if isinstance(text, str):
        text = text.decode('utf-8')
      self.response.text = text or u'Unknown error.'
      return

    actor = resp.get('actor')
    if actor:
      logging.info('Logged in as %s (%s)',
                   actor.get('username'), actor.get('displayName'))
    else:
      logging.warning("Couldn't determine Instagram user!")

    title = 'instagram-atom feed for %s' % ig.actor_name(actor)
    self.response.headers['Content-Type'] = 'application/atom+xml'
    self.response.out.write(atom.activities_to_atom(
      resp.get('items', []), actor, title=title, host_url=host_url,
      request_url=self.request.path_url, xml_base='https://www.instagram.com/'))
Example #24
0
  def get(self):
    site = util.get_required_param(self, 'site')
    group = self.request.get('group_id', source.ALL)

    if group == source.SEARCH:
      search_query = self.request.get('search_query', '')
      activity_id = ''
    else:
      activity_id = self.request.get('activity_id', '')
      search_query = ''

    params = {
      'plaintext': 'true',
      'search_query': search_query,
    }
    params.update({name: val for name, val in self.request.params.items()
                   if name in API_PARAMS})
    return self.redirect('/%s/@me/%s/@app/%s?%s' % (
      site, group, activity_id, urllib.urlencode(params)))
Example #25
0
    def get(self):
        cookie = 'sessionid=%s' % urllib.quote(
            util.get_required_param(self, 'sessionid').encode('utf-8'))
        logging.info('Fetching with Cookie: %s', cookie)

        ig = instagram.Instagram()
        try:
            resp = ig.get_activities_response(group_id=source.FRIENDS,
                                              scrape=True,
                                              cookie=cookie)
        except Exception as e:
            status, text = util.interpret_http_exception(e)
            if status:
                self.response.status = 502 if status == 500 else status
            elif util.is_connection_failure(e):
                self.response.status = 504  # HTTP 504 Gateway Timeout
            else:
                logging.exception('oops!')
                self.response.status = 500

            if isinstance(text, str):
                text = text.decode('utf-8')
            self.response.text = text or u'Unknown error.'
            return

        actor = resp.get('actor')
        if actor:
            logging.info('Logged in as %s (%s)', actor.get('username'),
                         actor.get('displayName'))
        else:
            logging.warning("Couldn't determine Instagram user!")

        title = 'instagram-atom feed for %s' % ig.actor_name(actor)
        self.response.headers['Content-Type'] = 'application/atom+xml'
        self.response.out.write(
            atom.activities_to_atom(resp.get('items', []),
                                    actor,
                                    title=title,
                                    host_url=self.request.host_url + '/',
                                    request_url=self.request.path_url,
                                    xml_base='https://www.instagram.com/'))
Example #26
0
    def get(self):
        site = util.get_required_param(self, 'site')
        group = self.request.get('group_id', source.ALL)

        if group == source.SEARCH:
            search_query = self.request.get('search_query', '')
            activity_id = ''
        else:
            activity_id = self.request.get('activity_id', '')
            search_query = ''

        params = {
            'plaintext': 'true',
            'search_query': search_query,
        }
        params.update({
            name: val
            for name, val in self.request.params.items() if name in API_PARAMS
        })
        return self.redirect(
            '/%s/@me/%s/@app/%s?%s' %
            (site, group, activity_id, urllib.urlencode(params)))
Example #27
0
def load_source(handler, param='source_key'):
  """Extracts a URL-safe key from a query parameter and loads a source object.

  Returns HTTP 400 if the parameter is not provided or the source doesn't exist.

  Args:
    handler: RequestHandler
    param: string

  Returns: Source object
  """
  try:
    source = ndb.Key(urlsafe=util.get_required_param(handler, param)).get()
  except (TypeError, ProtocolBufferDecodeError):
    msg = 'Bad value for %s' % param
    logging.warning(msg, exc_info=True)
    handler.abort(400, msg)

  if not source:
    handler.abort(400, 'Source key not found')

  return source
Example #28
0
  def get(self):
    site = util.get_required_param(self, 'site')
    group = self.request.get('group_id') or source.ALL
    user = self.request.get('user_id') or source.ME

    if group == source.SEARCH:
      search_query = self.request.get('search_query', '').encode('utf-8')
      activity_id = ''
    else:
      activity_id = self.request.get('activity_id', '').encode('utf-8')
      search_query = ''

    params = {
      'plaintext': 'true',
      'cache': 'false',
      'search_query': search_query,
    }
    params.update({name: val for name, val in self.request.params.items()
                   if name in API_PARAMS})
    return self.redirect('/%s/%s/%s/@app/%s?%s' % (
      site, urllib.quote_plus(user.encode('utf-8')), group, activity_id,
      urllib.urlencode(params)))
Example #29
0
  def get(self):
    site = util.get_required_param(self, 'site')
    group = self.request.get('group_id') or source.ALL
    user = self.request.get('user_id') or source.ME

    if group == source.SEARCH:
      search_query = self.request.get('search_query', '').encode('utf-8')
      activity_id = ''
    else:
      activity_id = self.request.get('activity_id', '').encode('utf-8')
      search_query = ''

    params = {
      'plaintext': 'true',
      'cache': 'false',
      'search_query': search_query,
    }
    params.update({name: val for name, val in self.request.params.items()
                   if name in API_PARAMS})
    return self.redirect('/%s/%s/%s/@app/%s?%s' % (
      site, urllib.quote_plus(user.encode('utf-8')), group, activity_id,
      urllib.urlencode(params)))
Example #30
0
def load_source(handler, param='source_key'):
  """Extracts a URL-safe key from a query parameter and loads a source object.

  Returns HTTP 400 if the parameter is not provided or the source doesn't exist.

  Args:
    handler: RequestHandler
    param: string

  Returns: Source object
  """
  try:
    source = ndb.Key(urlsafe=util.get_required_param(handler, param)).get()
  except (binascii.Error, google.protobuf.message.DecodeError):
    msg = 'Bad value for %s' % param
    logging.warning(msg, stack_info=True)
    handler.abort(400, msg)

  if not source:
    handler.abort(400, 'Source key not found')

  return source
Example #31
0
    def get(self):
        """Handles an API GET.

    Request path is of the form /site/user_id/group_id/app_id/activity_id ,
    where each element except site is an optional string object id.
    """
        # parse path
        args = urllib.unquote(self.request.path).strip('/').split('/')
        if not args or len(args) > MAX_PATH_LEN:
            raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' %
                                   (MAX_PATH_LEN, len(args)))

        # make source instance
        site = args.pop(0)
        if site == 'twitter':
            src = twitter.Twitter(access_token_key=util.get_required_param(
                self, 'access_token_key'),
                                  access_token_secret=util.get_required_param(
                                      self, 'access_token_secret'))
        elif site == 'facebook':
            src = facebook.Facebook(
                access_token=util.get_required_param(self, 'access_token'))
        elif site == 'flickr':
            src = flickr.Flickr(access_token_key=util.get_required_param(
                self, 'access_token_key'),
                                access_token_secret=util.get_required_param(
                                    self, 'access_token_secret'))
        elif site == 'instagram':
            src = instagram.Instagram(scrape=True)
        elif site == 'google+':
            auth_entity = util.get_required_param(self, 'auth_entity')
            src = googleplus.GooglePlus(auth_entity=ndb.Key(
                urlsafe=auth_entity).get())
        else:
            src_cls = source.sources.get(site)
            if not src_cls:
                raise exc.HTTPNotFound('Unknown site %r' % site)
            src = src_cls(**self.request.params)

        # decode tag URI ids
        for i, arg in enumerate(args):
            parsed = util.parse_tag_uri(arg)
            if parsed:
                domain, id = parsed
                if domain != src.DOMAIN:
                    raise exc.HTTPBadRequest(
                        'Expected domain %s in tag URI %s, found %s' %
                        (src.DOMAIN, arg, domain))
                args[i] = id

        # check if request is cached
        cache = self.request.get('cache', '').lower() != 'false'
        if cache:
            cache_key = 'R %s' % self.request.path
            cached = memcache.get(cache_key)
            if cached:
                logging.info('Serving cached response %r', cache_key)
                self.write_response(cached['response'],
                                    actor=cached['actor'],
                                    url=src.BASE_URL)
                return

        # handle default path elements
        args = [
            None if a in defaults else a
            for a, defaults in zip(args, PATH_DEFAULTS)
        ]
        user_id = args[0] if args else None

        # get activities (etc)
        try:
            if len(args) >= 2 and args[1] == '@blocks':
                response = {'items': src.get_blocklist()}
            else:
                response = src.get_activities_response(*args,
                                                       **self.get_kwargs(src))
        except (NotImplementedError, ValueError) as e:
            self.abort(400, str(e))
            # other exceptions are handled by webutil.handlers.handle_exception(),
            # which uses interpret_http_exception(), etc.

        # fetch actor if necessary
        actor = response.get('actor')
        if not actor and self.request.get('format') == 'atom':
            # atom needs actor
            args = [
                None if a in defaults else a  # handle default path elements
                for a, defaults in zip(args, PATH_DEFAULTS)
            ]
            user_id = args[0] if args else None
            actor = src.get_actor(user_id) if src else {}

        self.write_response(response, actor=actor, url=src.BASE_URL)

        # cache response
        if cache:
            logging.info('Caching response in %r', cache_key)
            memcache.set(cache_key, {
                'response': response,
                'actor': actor
            }, src.RESPONSE_CACHE_TIME)
Example #32
0
    def try_activitypub(self):
        source = util.get_required_param(self, 'source')

        # fetch source page, convert to ActivityStreams
        source_resp = common.requests_get(source)
        source_url = source_resp.url or source
        source_mf2 = mf2py.parse(source_resp.text, url=source_url)
        # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(source_mf2, indent=2))

        entry = mf2util.find_first_entry(source_mf2, ['h-entry'])
        logging.info('First entry: %s', json.dumps(entry, indent=2))
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [source_url]

        source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2))

        # fetch target page as AS object. target is first in-reply-to, like-of,
        # or repost-of, *not* target query param.)
        target = util.get_url(util.get_first(source_obj, 'inReplyTo') or
                              util.get_first(source_obj, 'object'))
        if not target:
            common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of '
                         'found in %s' % source_url)

        try:
            target_resp = common.get_as2(target)
        except (requests.HTTPError, exc.HTTPBadGateway) as e:
            if (e.response.status_code // 100 == 2 and
                common.content_type(e.response).startswith('text/html')):
                self.resp = Response.get_or_create(
                    source=source_url, target=e.response.url or target,
                    direction='out', source_mf2=json.dumps(source_mf2))
                return self.send_salmon(source_obj, target_resp=e.response)
            raise

        target_url = target_resp.url or target
        self.resp = Response.get_or_create(
            source=source_url, target=target_url, direction='out',
            protocol='activitypub', source_mf2=json.dumps(source_mf2))

        # find actor's inbox
        target_obj = target_resp.json()
        inbox_url = target_obj.get('inbox')

        if not inbox_url:
            # TODO: test actor/attributedTo and not, with/without inbox
            actor = target_obj.get('actor') or target_obj.get('attributedTo')
            if isinstance(actor, dict):
                inbox_url = actor.get('inbox')
                actor = actor.get('url')
            if not inbox_url and not actor:
                common.error(self, 'Target object has no actor or attributedTo URL')

        if not inbox_url:
            # fetch actor as AS object
            actor = common.get_as2(actor).json()
            inbox_url = actor.get('inbox')

        if not inbox_url:
            # TODO: probably need a way to save errors like this so that we can
            # return them if ostatus fails too.
            # common.error(self, 'Target actor has no inbox')
            return self.send_salmon(source_obj, target_resp=target_resp)

        # convert to AS2
        source_domain = urlparse.urlparse(source_url).netloc
        key = MagicKey.get_or_create(source_domain)
        source_activity = common.postprocess_as2(
            as2.from_as1(source_obj), target=target_obj, key=key)

        if self.resp.status == 'complete':
            source_activity['type'] = 'Update'

        # prepare HTTP Signature (required by Mastodon)
        # https://w3c.github.io/activitypub/#authorization-lds
        # https://tools.ietf.org/html/draft-cavage-http-signatures-07
        # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846
        acct = 'acct:%s@%s' % (source_domain, source_domain)
        auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct,
                                 algorithm='rsa-sha256')

        # deliver source object to target actor's inbox.
        headers = {
            'Content-Type': common.CONTENT_TYPE_AS2,
            # required for HTTP Signature
            # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
            'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
        }
        inbox_url = urlparse.urljoin(target_url, inbox_url)
        resp = common.requests_post(inbox_url, json=source_activity, auth=auth,
                                    headers=headers)
        self.response.status_int = resp.status_code
        if resp.status_code == 202:
            self.response.write('202 response! If this is Mastodon 1.x, their '
                                'signature verification probably failed. :(\n')
        self.response.write(resp.text)
Example #33
0
  def get(self):
    expected_inputs = ('activitystreams', 'html', 'json-mf2')
    input = util.get_required_param(self, 'input')
    if input not in expected_inputs:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, expected_inputs))
    url = util.get_required_param(self, 'url')

    # check if request is cached
    cache = self.request.get('cache', '').lower() != 'false'
    cache_key = 'U %s' % url
    cached = memcache.get(cache_key) if cache else None

    if cached:
      logging.info('Serving cached response %r', cache_key)
      url = cached['url']
      body = cached['body']
    else:
      # fetch url
      try:
        resp = util.urlopen(url)
      except (ValueError, httplib.InvalidURL) as e:
        self.abort(400, str(e))
      except Exception as e:
        if util.is_connection_failure(e):
          # HTTP 504 Gateway Timeout
          self.abort(504, str(e))
        raise

      if url != resp.geturl():
        url = resp.geturl()
        logging.info('Redirected to %s', url)
      body = resp.read()

      if cache:
        logging.info('Caching response in %r', cache_key)
        memcache.set(cache_key, {'url': url, 'body': body}, URL_CACHE_TIME)

    # decode data
    mf2 = None
    if input == 'html':
      mf2 = mf2py.parse(doc=body, url=url)
    elif input == 'json-mf2':
      mf2 = json.loads(body)
      mf2.setdefault('rels', {})  # mf2util expects rels

    actor = None
    title = None
    if mf2:
      actor = microformats2.find_author(
        mf2, fetch_mf2_func=lambda url: mf2py.parse(url=url))
      title = mf2util.interpret_feed(mf2, url).get('name')

    if input == 'activitystreams':
      activities = json.loads(body)
    elif input == 'html':
      activities = microformats2.html_to_activities(body, url, actor)
    elif input == 'json-mf2':
      activities = [microformats2.json_to_object(item, actor=actor)
                    for item in mf2.get('items', [])]

    self.write_response(source.Source.make_activities_base_response(activities),
                        url=url, actor=actor, title=title)
Example #34
0
    def get(self):
        input = util.get_required_param(self, 'input')
        if input not in INPUTS:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, INPUTS))
        url, body = self._fetch(util.get_required_param(self, 'url'))

        # decode data
        if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2',
                     'jsonfeed'):
            try:
                body_json = json.loads(body)
                body_items = (body_json if isinstance(body_json, list) else
                              body_json.get('items') or [body_json])
            except (TypeError, ValueError):
                raise exc.HTTPBadRequest('Could not decode %s as JSON' % url)

        mf2 = None
        if input == 'html':
            mf2 = mf2py.parse(doc=body, url=url)
        elif input in ('mf2-json', 'json-mf2'):
            mf2 = body_json
            mf2.setdefault('rels', {})  # mf2util expects rels

        actor = None
        title = None
        if mf2:

            def fetch_mf2_func(url):
                if util.domain_or_parent_in(
                        urlparse.urlparse(url).netloc, SILO_DOMAINS):
                    return {
                        'items': [{
                            'type': ['h-card'],
                            'properties': {
                                'url': [url]
                            }
                        }]
                    }
                _, doc = self._fetch(url)
                return mf2py.parse(doc=doc, url=url)

            try:
                actor = microformats2.find_author(
                    mf2, fetch_mf2_func=fetch_mf2_func)
                title = microformats2.get_title(mf2)
            except (KeyError, ValueError) as e:
                raise exc.HTTPBadRequest('Could not parse %s as %s: %s' %
                                         (url, input, e))

        if input in ('as1', 'activitystreams'):
            activities = body_items
        elif input == 'as2':
            activities = [as2.to_as1(obj) for obj in body_items]
        elif input == 'atom':
            try:
                activities = atom.atom_to_activities(body)
            except ElementTree.ParseError as e:
                raise exc.HTTPBadRequest('Could not parse %s as XML: %s' %
                                         (url, e))
            except ValueError as e:
                raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' %
                                         (url, e))
        elif input == 'html':
            activities = microformats2.html_to_activities(body, url, actor)
        elif input in ('mf2-json', 'json-mf2'):
            activities = [
                microformats2.json_to_object(item, actor=actor)
                for item in mf2.get('items', [])
            ]
        elif input == 'jsonfeed':
            try:
                activities, actor = jsonfeed.jsonfeed_to_activities(body_json)
            except ValueError as e:
                logging.exception('jsonfeed_to_activities failed')
                raise exc.HTTPBadRequest('Could not parse %s as JSON Feed' %
                                         url)

        self.write_response(
            source.Source.make_activities_base_response(activities),
            url=url,
            actor=actor,
            title=title)
Example #35
0
  def get(self):
    """Handles an API GET.

    Request path is of the form /site/user_id/group_id/app_id/activity_id ,
    where each element except site is an optional string object id.
    """
    # parse path
    args = urllib.unquote(self.request.path).strip('/').split('/')
    if not args or len(args) > MAX_PATH_LEN:
      raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' %
                             (MAX_PATH_LEN, len(args)))

    # make source instance
    site = args.pop(0)
    if site == 'twitter':
      src = twitter.Twitter(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'facebook':
      src = facebook.Facebook(
        access_token=util.get_required_param(self, 'access_token'))
    elif site == 'flickr':
      src = flickr.Flickr(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'instagram':
      src = instagram.Instagram(scrape=True)
    elif site == 'google+':
      auth_entity = util.get_required_param(self, 'auth_entity')
      src = googleplus.GooglePlus(auth_entity=ndb.Key(urlsafe=auth_entity).get())
    else:
      src_cls = source.sources.get(site)
      if not src_cls:
        raise exc.HTTPNotFound('Unknown site %r' % site)
      src = src_cls(**self.request.params)

    # decode tag URI ids
    for i, arg in enumerate(args):
      parsed = util.parse_tag_uri(arg)
      if parsed:
        domain, id = parsed
        if domain != src.DOMAIN:
          raise exc.HTTPBadRequest('Expected domain %s in tag URI %s, found %s' %
                                   (src.DOMAIN, arg, domain))
        args[i] = id

    # check if request is cached
    cache = self.request.get('cache', '').lower() != 'false'
    if cache:
      cache_key = 'R %s' % self.request.path
      cached = memcache.get(cache_key)
      if cached:
        logging.info('Serving cached response %r', cache_key)
        self.write_response(cached['response'], actor=cached['actor'],
                            url=src.BASE_URL)
        return

    # handle default path elements
    args = [None if a in defaults else a
            for a, defaults in zip(args, PATH_DEFAULTS)]
    user_id = args[0] if args else None

    # get activities
    try:
      response = src.get_activities_response(*args, **self.get_kwargs(src))
    except NotImplementedError as e:
      self.abort(400, str(e))
    except Exception as e:
      if util.is_connection_failure(e):
        # HTTP 504 Gateway Timeout
        self.abort(504, str(e))
      raise

    # fetch actor if necessary
    actor = response.get('actor')
    if not actor and self.request.get('format') == 'atom':
      # atom needs actor
      args = [None if a in defaults else a  # handle default path elements
              for a, defaults in zip(args, PATH_DEFAULTS)]
      user_id = args[0] if args else None
      actor = src.get_actor(user_id) if src else {}

    self.write_response(response, actor=actor, url=src.BASE_URL)

    # cache response
    if cache:
      logging.info('Caching response in %r', cache_key)
      memcache.set(cache_key, {'response': response, 'actor': actor},
                   src.RESPONSE_CACHE_TIME)
Example #36
0
  def get(self):
    """Handles an API GET.

    Request path is of the form /site/user_id/group_id/app_id/activity_id ,
    where each element except site is an optional string object id.
    """
    # parse path
    args = urllib.unquote(self.request.path).strip('/').split('/')
    if not args or len(args) > MAX_PATH_LEN:
      raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' %
                             (MAX_PATH_LEN, len(args)))

    # make source instance
    site = args.pop(0)
    if site == 'twitter':
      src = twitter.Twitter(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'facebook':
      src = facebook.Facebook(
        access_token=util.get_required_param(self, 'access_token'))
    elif site == 'flickr':
      src = flickr.Flickr(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'github':
      src = github.GitHub(
        access_token=util.get_required_param(self, 'access_token'))
    elif site == 'instagram':
      src = instagram.Instagram(scrape=True)
    else:
      src_cls = source.sources.get(site)
      if not src_cls:
        raise exc.HTTPNotFound('Unknown site %r' % site)
      src = src_cls(**self.request.params)

    # decode tag URI ids
    for i, arg in enumerate(args):
      parsed = util.parse_tag_uri(arg)
      if parsed:
        domain, id = parsed
        if domain != src.DOMAIN:
          raise exc.HTTPBadRequest('Expected domain %s in tag URI %s, found %s' %
                                   (src.DOMAIN, arg, domain))
        args[i] = id

    # handle default path elements
    args = [None if a in defaults else a
            for a, defaults in zip(args, PATH_DEFAULTS)]
    user_id = args[0] if args else None

    # get activities (etc)
    try:
      if len(args) >= 2 and args[1] == '@blocks':
        try:
          response = {'items': src.get_blocklist()}
        except source.RateLimited as e:
          if not e.partial:
            self.abort(429, str(e))
          response = {'items': e.partial}
      else:
        response = src.get_activities_response(*args, **self.get_kwargs())
    except (NotImplementedError, ValueError) as e:
      self.abort(400, str(e))
      # other exceptions are handled by webutil.handlers.handle_exception(),
      # which uses interpret_http_exception(), etc.

    # fetch actor if necessary
    actor = response.get('actor')
    if not actor and self.request.get('format') == 'atom':
      # atom needs actor
      args = [None if a in defaults else a  # handle default path elements
              for a, defaults in zip(args, PATH_DEFAULTS)]
      actor = src.get_actor(user_id) if src else {}

    self.write_response(response, actor=actor, url=src.BASE_URL)
Example #37
0
    def get(self):
        input = util.get_required_param(self, 'input')
        if input not in INPUTS:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, INPUTS))

        orig_url = util.get_required_param(self, 'url')
        fragment = urllib.parse.urlparse(orig_url).fragment
        if fragment and input != 'html':
            raise exc.HTTPBadRequest(
                'URL fragments only supported with input=html.')

        resp = util.requests_get(orig_url, gateway=True)
        final_url = resp.url

        # decode data
        if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2',
                     'jsonfeed'):
            try:
                body_json = json_loads(resp.text)
                body_items = (body_json if isinstance(body_json, list) else
                              body_json.get('items') or [body_json])
            except (TypeError, ValueError):
                raise exc.HTTPBadRequest('Could not decode %s as JSON' %
                                         final_url)

        mf2 = None
        if input == 'html':
            mf2 = util.parse_mf2(resp, id=fragment)
            if id and not mf2:
                raise exc.HTTPBadRequest(
                    'Got fragment %s but no element found with that id.' %
                    fragment)
        elif input in ('mf2-json', 'json-mf2'):
            mf2 = body_json
            if not hasattr(mf2, 'get'):
                raise exc.HTTPBadRequest(
                    'Expected microformats2 JSON input to be dict, got %s' %
                    mf2.__class__.__name__)
            mf2.setdefault('rels', {})  # mf2util expects rels

        actor = None
        title = None
        hfeed = None
        if mf2:

            def fetch_mf2_func(url):
                if util.domain_or_parent_in(
                        urllib.parse.urlparse(url).netloc, SILO_DOMAINS):
                    return {
                        'items': [{
                            'type': ['h-card'],
                            'properties': {
                                'url': [url]
                            }
                        }]
                    }
                return util.fetch_mf2(url, gateway=True)

            try:
                actor = microformats2.find_author(
                    mf2, fetch_mf2_func=fetch_mf2_func)
                title = microformats2.get_title(mf2)
                hfeed = mf2util.find_first_entry(mf2, ['h-feed'])
            except (KeyError, ValueError) as e:
                raise exc.HTTPBadRequest('Could not parse %s as %s: %s' %
                                         (final_url, input, e))

        try:
            if input in ('as1', 'activitystreams'):
                activities = body_items
            elif input == 'as2':
                activities = [as2.to_as1(obj) for obj in body_items]
            elif input == 'atom':
                try:
                    activities = atom.atom_to_activities(resp.text)
                except ElementTree.ParseError as e:
                    raise exc.HTTPBadRequest('Could not parse %s as XML: %s' %
                                             (final_url, e))
                except ValueError as e:
                    raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' %
                                             (final_url, e))
            elif input == 'html':
                activities = microformats2.html_to_activities(resp,
                                                              url=final_url,
                                                              id=fragment,
                                                              actor=actor)
            elif input in ('mf2-json', 'json-mf2'):
                activities = [
                    microformats2.json_to_object(item, actor=actor)
                    for item in mf2.get('items', [])
                ]
            elif input == 'jsonfeed':
                activities, actor = jsonfeed.jsonfeed_to_activities(body_json)
        except ValueError as e:
            logging.warning('parsing input failed', stack_info=True)
            self.abort(
                400,
                'Could not parse %s as %s: %s' % (final_url, input, str(e)))

        self.write_response(
            source.Source.make_activities_base_response(activities),
            url=final_url,
            actor=actor,
            title=title,
            hfeed=hfeed)
Example #38
0
    def get(self):
        cookie = 'sessionid=%s' % urllib.parse.quote(
            util.get_required_param(self, 'sessionid').encode('utf-8'))
        logging.info('Fetching with Cookie: %s', cookie)

        host_url = self.request.host_url + '/'
        ig = instagram.Instagram()
        try:
            resp = ig.get_activities_response(group_id=source.FRIENDS,
                                              scrape=True,
                                              cookie=cookie)
        except Exception as e:
            status, text = util.interpret_http_exception(e)
            if status in ('403', ):
                self.response.headers['Content-Type'] = 'application/atom+xml'
                self.response.out.write(
                    atom.activities_to_atom([{
                        'object': {
                            'url':
                            self.request.url,
                            'content':
                            'Your instagram-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>'
                            % host_url,
                        },
                    }], {},
                                            title='instagram-atom',
                                            host_url=host_url,
                                            request_url=self.request.path_url))
                return
            elif status == '401':
                # IG returns 401 sometimes as a form of rate limiting or bot detection
                self.response.status = '429'
            elif status:
                self.response.status = status
            else:
                logging.exception('oops!')
                self.response.status = 500

            self.response.text = text or 'Unknown error.'
            return

        actor = resp.get('actor')
        if actor:
            logging.info('Logged in as %s (%s)', actor.get('username'),
                         actor.get('displayName'))
        else:
            logging.warning("Couldn't determine Instagram user!")

        activities = resp.get('items', [])
        format = self.request.get('format', 'atom')
        if format == 'atom':
            title = 'instagram-atom feed for %s' % ig.actor_name(actor)
            self.response.headers['Content-Type'] = 'application/atom+xml'
            self.response.out.write(
                atom.activities_to_atom(activities,
                                        actor,
                                        title=title,
                                        host_url=host_url,
                                        request_url=self.request.path_url,
                                        xml_base='https://www.instagram.com/'))
        elif format == 'html':
            self.response.headers['Content-Type'] = 'text/html'
            self.response.out.write(
                microformats2.activities_to_html(activities))
        else:
            self.abort(400,
                       'format must be either atom or html; got %s' % format)
Example #39
0
  def get(self):
    """Handles an API GET.

    Request path is of the form /site/user_id/group_id/app_id/activity_id ,
    where each element except site is an optional string object id.
    """
    # parse path
    args = urllib.unquote(self.request.path).strip('/').split('/')
    if not args or len(args) > MAX_PATH_LEN:
      raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' %
                             (MAX_PATH_LEN, len(args)))

    # make source instance
    site = args.pop(0)
    if site == 'twitter':
      src = twitter.Twitter(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'facebook':
      src = facebook.Facebook(
        access_token=util.get_required_param(self, 'access_token'))
    elif site == 'flickr':
      src = flickr.Flickr(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'github':
      src = github.GitHub(
        access_token=util.get_required_param(self, 'access_token'))
    elif site == 'instagram':
      src = instagram.Instagram(scrape=True)
    else:
      src_cls = source.sources.get(site)
      if not src_cls:
        raise exc.HTTPNotFound('Unknown site %r' % site)
      src = src_cls(**self.request.params)

    # decode tag URI ids
    for i, arg in enumerate(args):
      parsed = util.parse_tag_uri(arg)
      if parsed:
        domain, id = parsed
        if domain != src.DOMAIN:
          raise exc.HTTPBadRequest('Expected domain %s in tag URI %s, found %s' %
                                   (src.DOMAIN, arg, domain))
        args[i] = id

    # handle default path elements
    args = [None if a in defaults else a
            for a, defaults in zip(args, PATH_DEFAULTS)]
    user_id = args[0] if args else None

    # get activities (etc)
    try:
      if len(args) >= 2 and args[1] == '@blocks':
        try:
          response = {'items': src.get_blocklist()}
        except source.RateLimited as e:
          if not e.partial:
            self.abort(429, str(e))
          response = {'items': e.partial}
      else:
        response = src.get_activities_response(*args, **self.get_kwargs())
    except (NotImplementedError, ValueError) as e:
      self.abort(400, str(e))
      # other exceptions are handled by webutil.handlers.handle_exception(),
      # which uses interpret_http_exception(), etc.

    # fetch actor if necessary
    actor = response.get('actor')
    if not actor and self.request.get('format') == 'atom':
      # atom needs actor
      args = [None if a in defaults else a  # handle default path elements
              for a, defaults in zip(args, PATH_DEFAULTS)]
      actor = src.get_actor(user_id) if src else {}

    self.write_response(response, actor=actor, url=src.BASE_URL)
Example #40
0
    def get(self):
        """Handles an API GET.

    Request path is of the form /site/user_id/group_id/app_id/activity_id ,
    where each element except site is an optional string object id.
    """
        # parse path
        args = urllib.parse.unquote(self.request.path).strip('/').split('/')
        if not args or len(args) > MAX_PATH_LEN:
            raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' %
                                   (MAX_PATH_LEN, len(args)))

        if len(args) > 1 and args[1] == 'nederland20':
            return self.abort(
                401,
                'To protect our users from spam and other malicious activity, this account is temporarily locked. Please log in to https://twitter.com to unlock your account.'
            )

        # make source instance
        site = args.pop(0)
        if site == 'twitter':
            src = twitter.Twitter(access_token_key=util.get_required_param(
                self, 'access_token_key'),
                                  access_token_secret=util.get_required_param(
                                      self, 'access_token_secret'))
        elif site == 'facebook':
            self.abort(
                400,
                'Sorry, Facebook is no longer available in the REST API. Try the library instead!'
            )
        elif site == 'flickr':
            src = flickr.Flickr(access_token_key=util.get_required_param(
                self, 'access_token_key'),
                                access_token_secret=util.get_required_param(
                                    self, 'access_token_secret'))
        elif site == 'github':
            src = github.GitHub(
                access_token=util.get_required_param(self, 'access_token'))
        elif site == 'instagram':
            if self.request.get('interactive').lower() == 'true':
                src = instagram.Instagram(scrape=True)
            else:
                self.abort(
                    400,
                    'Sorry, Instagram is not currently available in the REST API. Try https://instagram-atom.appspot.com/ instead!'
                )
        elif site == 'mastodon':
            src = mastodon.Mastodon(
                instance=util.get_required_param(self, 'instance'),
                access_token=util.get_required_param(self, 'access_token'),
                user_id=util.get_required_param(self, 'user_id'))
        elif site == 'meetup':
            src = meetup.Meetup(access_token_key=util.get_required_param(
                self, 'access_token_key'),
                                access_token_secret=util.get_required_param(
                                    self, 'access_token_secret'))
        elif site == 'pixelfed':
            src = pixelfed.Pixelfed(
                instance=util.get_required_param(self, 'instance'),
                access_token=util.get_required_param(self, 'access_token'),
                user_id=util.get_required_param(self, 'user_id'))
        elif site == 'reddit':
            src = reddit.Reddit(refresh_token=util.get_required_param(
                self, 'refresh_token'
            ))  # the refresh_roken should be returned but is not appearing
        else:
            src_cls = source.sources.get(site)
            if not src_cls:
                raise exc.HTTPNotFound('Unknown site %r' % site)
            src = src_cls(**self.request.params)

        # decode tag URI ids
        for i, arg in enumerate(args):
            parsed = util.parse_tag_uri(arg)
            if parsed:
                domain, id = parsed
                if domain != src.DOMAIN:
                    raise exc.HTTPBadRequest(
                        'Expected domain %s in tag URI %s, found %s' %
                        (src.DOMAIN, arg, domain))
                args[i] = id

        # handle default path elements
        args = [
            None if a in defaults else a
            for a, defaults in zip(args, PATH_DEFAULTS)
        ]
        user_id = args[0] if args else None

        # get activities (etc)
        try:
            if len(args) >= 2 and args[1] == '@blocks':
                try:
                    response = {'items': src.get_blocklist()}
                except source.RateLimited as e:
                    if not e.partial:
                        self.abort(429, str(e))
                    response = {'items': e.partial}
            else:
                response = src.get_activities_response(*args,
                                                       **self.get_kwargs())
        except (NotImplementedError, ValueError) as e:
            self.abort(400, str(e))
            # other exceptions are handled by webutil.handlers.handle_exception(),
            # which uses interpret_http_exception(), etc.

        # fetch actor if necessary
        actor = response.get('actor')
        if not actor and self.request.get('format') == 'atom':
            # atom needs actor
            actor = src.get_actor(user_id) if src else {}

        self.write_response(response, actor=actor, url=src.BASE_URL)
Example #41
0
  def get(self):
    input = util.get_required_param(self, 'input')
    if input not in INPUTS:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, INPUTS))
    url, body = self._fetch(util.get_required_param(self, 'url'))

    # decode data
    if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2', 'jsonfeed'):
      try:
        body_json = json.loads(body)
        body_items = (body_json if isinstance(body_json, list)
                      else body_json.get('items') or [body_json])
      except (TypeError, ValueError):
        raise exc.HTTPBadRequest('Could not decode %s as JSON' % url)

    mf2 = None
    if input == 'html':
      mf2 = mf2py.parse(doc=body, url=url, img_with_alt=True)
    elif input in ('mf2-json', 'json-mf2'):
      mf2 = body_json
      if not hasattr(mf2, 'get'):
        raise exc.HTTPBadRequest(
          'Expected microformats2 JSON input to be dict, got %s' %
          mf2.__class__.__name__)
      mf2.setdefault('rels', {})  # mf2util expects rels

    actor = None
    title = None
    hfeed = None
    if mf2:
      def fetch_mf2_func(url):
        if util.domain_or_parent_in(urlparse.urlparse(url).netloc, SILO_DOMAINS):
          return {'items': [{'type': ['h-card'], 'properties': {'url': [url]}}]}
        _, doc = self._fetch(url)
        return mf2py.parse(doc=doc, url=url, img_with_alt=True)

      try:
        actor = microformats2.find_author(mf2, fetch_mf2_func=fetch_mf2_func)
        title = microformats2.get_title(mf2)
        hfeed = mf2util.find_first_entry(mf2, ['h-feed'])
      except (KeyError, ValueError) as e:
        raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (url, input, e))

    try:
      if input in ('as1', 'activitystreams'):
        activities = body_items
      elif input == 'as2':
        activities = [as2.to_as1(obj) for obj in body_items]
      elif input == 'atom':
        try:
          activities = atom.atom_to_activities(body)
        except ElementTree.ParseError as e:
          raise exc.HTTPBadRequest('Could not parse %s as XML: %s' % (url, e))
        except ValueError as e:
          raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' % (url, e))
      elif input == 'html':
        activities = microformats2.html_to_activities(body, url, actor)
      elif input in ('mf2-json', 'json-mf2'):
        activities = [microformats2.json_to_object(item, actor=actor)
                      for item in mf2.get('items', [])]
      elif input == 'jsonfeed':
        activities, actor = jsonfeed.jsonfeed_to_activities(body_json)
    except ValueError as e:
      logging.warning('parsing input failed', exc_info=True)
      self.abort(400, 'Could not parse %s as %s: %s' % (url, input, str(e)))

    self.write_response(source.Source.make_activities_base_response(activities),
                        url=url, actor=actor, title=title, hfeed=hfeed)