Beispiel #1
0
    def get(self):
        expected_inputs = ('activitystreams', 'html', 'json-mf2')
        input = util.get_required_param(self, 'input')
        if input not in expected_inputs:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, expected_inputs))

        # fetch url
        url = util.get_required_param(self, 'url')
        logging.info('Fetching %s', url)
        resp = urllib2.urlopen(url, timeout=appengine_config.HTTP_TIMEOUT)
        if url != resp.geturl():
            logging.info('Redirected to %s', resp.geturl())
        body = resp.read()

        # decode data
        if input == 'activitystreams':
            activities = json.loads(body)
        elif input == 'html':
            activities = microformats2.html_to_activities(body, resp.geturl())
        elif input == 'json-mf2':
            activities = [
                microformats2.json_to_object(item)
                for item in json.loads(body).get('items', [])
            ]

        self.write_response(
            source.Source.make_activities_base_response(activities))
Beispiel #2
0
  def get(self):
    expected_inputs = ('activitystreams', 'html', 'json-mf2')
    input = util.get_required_param(self, 'input')
    if input not in expected_inputs:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, expected_inputs))

    # fetch url
    url = util.get_required_param(self, 'url')
    logging.info('Fetching %s', url)
    resp = urllib2.urlopen(url, timeout=appengine_config.HTTP_TIMEOUT)
    if url != resp.geturl():
      logging.info('Redirected to %s', resp.geturl())
    body = resp.read()

    # decode data
    if input == 'activitystreams':
      activities = json.loads(body)
    elif input == 'html':
      activities = microformats2.html_to_activities(body, resp.geturl())
    elif input == 'json-mf2':
      activities = [microformats2.json_to_object(item)
                    for item in json.loads(body).get('items', [])]

    self.write_response(source.Source.make_activities_base_response(activities))
Beispiel #3
0
  def get(self):
    expected_inputs = ('activitystreams', 'html', 'json-mf2')
    input = util.get_required_param(self, 'input')
    if input not in expected_inputs:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, expected_inputs))
    url = util.get_required_param(self, 'url')

    # check if request is cached
    cache = self.request.get('cache', '').lower() != 'false'
    cache_key = 'U %s' % url
    cached = memcache.get(cache_key) if cache else None

    if cached:
      logging.info('Serving cached response %r', cache_key)
      url = cached['url']
      body = cached['body']
    else:
      # fetch url
      try:
        resp = util.urlopen(url)
      except (ValueError, httplib.InvalidURL) as e:
        self.abort(400, str(e))
        # other exceptions are handled by webutil.handlers.handle_exception(),
        # which uses interpret_http_exception(), etc.

      if url != resp.geturl():
        url = resp.geturl()
        logging.info('Redirected to %s', url)
      body = resp.read()

      if cache:
        logging.info('Caching response in %r', cache_key)
        memcache.set(cache_key, {'url': url, 'body': body}, URL_CACHE_TIME)

    # decode data
    mf2 = None
    if input == 'html':
      mf2 = mf2py.parse(doc=body, url=url)
    elif input == 'json-mf2':
      mf2 = json.loads(body)
      mf2.setdefault('rels', {})  # mf2util expects rels

    actor = None
    title = None
    if mf2:
      actor = microformats2.find_author(
        mf2, fetch_mf2_func=lambda url: mf2py.parse(url=url))
      title = mf2util.interpret_feed(mf2, url).get('name')

    if input == 'activitystreams':
      activities = json.loads(body)
    elif input == 'html':
      activities = microformats2.html_to_activities(body, url, actor)
    elif input == 'json-mf2':
      activities = [microformats2.json_to_object(item, actor=actor)
                    for item in mf2.get('items', [])]

    self.write_response(source.Source.make_activities_base_response(activities),
                        url=url, actor=actor, title=title)
Beispiel #4
0
    def get(self):
        expected_inputs = ('activitystreams', 'html', 'json-mf2', 'jsonfeed')
        input = util.get_required_param(self, 'input')
        if input not in expected_inputs:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, expected_inputs))
        url, body = self._urlopen(util.get_required_param(self, 'url'))

        # decode data
        mf2 = None
        if input == 'html':
            mf2 = mf2py.parse(doc=body, url=url)
        elif input == 'json-mf2':
            mf2 = json.loads(body)
            mf2.setdefault('rels', {})  # mf2util expects rels

        actor = None
        title = None
        if mf2:

            def fetch_mf2_func(url):
                _, doc = self._urlopen(url)
                return mf2py.parse(doc=doc, url=url)

            actor = microformats2.find_author(mf2,
                                              fetch_mf2_func=fetch_mf2_func)
            title = mf2util.interpret_feed(mf2, url).get('name')

        if input == 'activitystreams':
            activities = json.loads(body)
        elif input == 'html':
            activities = microformats2.html_to_activities(body, url, actor)
        elif input == 'json-mf2':
            activities = [
                microformats2.json_to_object(item, actor=actor)
                for item in mf2.get('items', [])
            ]
        elif input == 'jsonfeed':
            activities, actor = jsonfeed.jsonfeed_to_activities(
                json.loads(body))

        self.write_response(
            source.Source.make_activities_base_response(activities),
            url=url,
            actor=actor,
            title=title)
Beispiel #5
0
  def test_html_to_activities_brs_to_newlines(self):
    """Mostly tests that mf2py converts <br>s to \ns.

    Background:
    https://github.com/snarfed/granary/issues/142
    https://github.com/microformats/mf2py/issues/51
    https://pin13.net/mf2/whitespace.html
    """
    html = """\
<article class="h-entry">
<div class="e-content p-name">foo bar<br />baz <br><br> baj</div>
</article>"""
    activities = microformats2.html_to_activities(html)
    self.assert_equals([{'object': {
      'objectType': 'note',
      'content': 'foo bar<br/>baz <br/><br/> baj',
      'content_is_html': True,
      'displayName': 'foo bar\nbaz \n\n baj',
    }}], activities)
Beispiel #6
0
    def test_html_to_activities_brs_to_newlines(self):
        """Mostly tests that mf2py converts <br>s to \ns.

    Background:
    https://github.com/snarfed/granary/issues/142
    https://github.com/microformats/mf2py/issues/51
    https://pin13.net/mf2/whitespace.html
    """
        html = """\
<article class="h-entry">
<div class="e-content p-name">foo bar<br />baz <br><br> baj</div>
</article>"""
        activities = microformats2.html_to_activities(html)
        self.assert_equals([{
            'object': {
                'objectType': 'note',
                'content': 'foo bar<br/>baz <br/><br/> baj',
                'content_is_html': True,
                'displayName': 'foo bar\nbaz \n\n baj',
            }
        }], activities)
Beispiel #7
0
  def get(self):
    expected_inputs = ('activitystreams', 'html', 'json-mf2')
    input = util.get_required_param(self, 'input')
    if input not in expected_inputs:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, expected_inputs))

    # fetch url
    url = util.get_required_param(self, 'url')
    resp = util.urlopen(url)
    if url != resp.geturl():
      url = resp.geturl()
      logging.info('Redirected to %s', url)
    body = resp.read()

    # decode data
    mf2 = None
    if input == 'activitystreams':
      activities = json.loads(body)
    elif input == 'html':
      activities = microformats2.html_to_activities(body, url)
      mf2 = mf2py.parse(doc=body, url=url)
    elif input == 'json-mf2':
      mf2 = json.loads(body)
      mf2['rels'] = {}  # mf2util expects rels
      activities = [microformats2.json_to_object(item)
                    for item in mf2.get('items', [])]

    author = None
    title = None
    if mf2:
      author = microformats2.find_author(mf2)
      title = mf2util.interpret_feed(mf2, url).get('name')

    self.write_response(source.Source.make_activities_base_response(activities),
                        url=url, actor=author, title=title)
Beispiel #8
0
    def get(self):
        input = util.get_required_param(self, 'input')
        if input not in INPUTS:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, INPUTS))
        url, body = self._fetch(util.get_required_param(self, 'url'))

        # decode data
        if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2',
                     'jsonfeed'):
            try:
                body_json = json.loads(body)
                body_items = (body_json if isinstance(body_json, list) else
                              body_json.get('items') or [body_json])
            except (TypeError, ValueError):
                raise exc.HTTPBadRequest('Could not decode %s as JSON' % url)

        mf2 = None
        if input == 'html':
            mf2 = mf2py.parse(doc=body, url=url)
        elif input in ('mf2-json', 'json-mf2'):
            mf2 = body_json
            mf2.setdefault('rels', {})  # mf2util expects rels

        actor = None
        title = None
        if mf2:

            def fetch_mf2_func(url):
                if util.domain_or_parent_in(
                        urlparse.urlparse(url).netloc, SILO_DOMAINS):
                    return {
                        'items': [{
                            'type': ['h-card'],
                            'properties': {
                                'url': [url]
                            }
                        }]
                    }
                _, doc = self._fetch(url)
                return mf2py.parse(doc=doc, url=url)

            try:
                actor = microformats2.find_author(
                    mf2, fetch_mf2_func=fetch_mf2_func)
                title = microformats2.get_title(mf2)
            except (KeyError, ValueError) as e:
                raise exc.HTTPBadRequest('Could not parse %s as %s: %s' %
                                         (url, input, e))

        if input in ('as1', 'activitystreams'):
            activities = body_items
        elif input == 'as2':
            activities = [as2.to_as1(obj) for obj in body_items]
        elif input == 'atom':
            try:
                activities = atom.atom_to_activities(body)
            except ElementTree.ParseError as e:
                raise exc.HTTPBadRequest('Could not parse %s as XML: %s' %
                                         (url, e))
            except ValueError as e:
                raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' %
                                         (url, e))
        elif input == 'html':
            activities = microformats2.html_to_activities(body, url, actor)
        elif input in ('mf2-json', 'json-mf2'):
            activities = [
                microformats2.json_to_object(item, actor=actor)
                for item in mf2.get('items', [])
            ]
        elif input == 'jsonfeed':
            try:
                activities, actor = jsonfeed.jsonfeed_to_activities(body_json)
            except ValueError as e:
                logging.exception('jsonfeed_to_activities failed')
                raise exc.HTTPBadRequest('Could not parse %s as JSON Feed' %
                                         url)

        self.write_response(
            source.Source.make_activities_base_response(activities),
            url=url,
            actor=actor,
            title=title)
Beispiel #9
0
    def get(self):
        input = util.get_required_param(self, 'input')
        if input not in INPUTS:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, INPUTS))

        orig_url = util.get_required_param(self, 'url')
        fragment = urllib.parse.urlparse(orig_url).fragment
        if fragment and input != 'html':
            raise exc.HTTPBadRequest(
                'URL fragments only supported with input=html.')

        resp = util.requests_get(orig_url, gateway=True)
        final_url = resp.url

        # decode data
        if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2',
                     'jsonfeed'):
            try:
                body_json = json_loads(resp.text)
                body_items = (body_json if isinstance(body_json, list) else
                              body_json.get('items') or [body_json])
            except (TypeError, ValueError):
                raise exc.HTTPBadRequest('Could not decode %s as JSON' %
                                         final_url)

        mf2 = None
        if input == 'html':
            mf2 = util.parse_mf2(resp, id=fragment)
            if id and not mf2:
                raise exc.HTTPBadRequest(
                    'Got fragment %s but no element found with that id.' %
                    fragment)
        elif input in ('mf2-json', 'json-mf2'):
            mf2 = body_json
            if not hasattr(mf2, 'get'):
                raise exc.HTTPBadRequest(
                    'Expected microformats2 JSON input to be dict, got %s' %
                    mf2.__class__.__name__)
            mf2.setdefault('rels', {})  # mf2util expects rels

        actor = None
        title = None
        hfeed = None
        if mf2:

            def fetch_mf2_func(url):
                if util.domain_or_parent_in(
                        urllib.parse.urlparse(url).netloc, SILO_DOMAINS):
                    return {
                        'items': [{
                            'type': ['h-card'],
                            'properties': {
                                'url': [url]
                            }
                        }]
                    }
                return util.fetch_mf2(url, gateway=True)

            try:
                actor = microformats2.find_author(
                    mf2, fetch_mf2_func=fetch_mf2_func)
                title = microformats2.get_title(mf2)
                hfeed = mf2util.find_first_entry(mf2, ['h-feed'])
            except (KeyError, ValueError) as e:
                raise exc.HTTPBadRequest('Could not parse %s as %s: %s' %
                                         (final_url, input, e))

        try:
            if input in ('as1', 'activitystreams'):
                activities = body_items
            elif input == 'as2':
                activities = [as2.to_as1(obj) for obj in body_items]
            elif input == 'atom':
                try:
                    activities = atom.atom_to_activities(resp.text)
                except ElementTree.ParseError as e:
                    raise exc.HTTPBadRequest('Could not parse %s as XML: %s' %
                                             (final_url, e))
                except ValueError as e:
                    raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' %
                                             (final_url, e))
            elif input == 'html':
                activities = microformats2.html_to_activities(resp,
                                                              url=final_url,
                                                              id=fragment,
                                                              actor=actor)
            elif input in ('mf2-json', 'json-mf2'):
                activities = [
                    microformats2.json_to_object(item, actor=actor)
                    for item in mf2.get('items', [])
                ]
            elif input == 'jsonfeed':
                activities, actor = jsonfeed.jsonfeed_to_activities(body_json)
        except ValueError as e:
            logging.warning('parsing input failed', stack_info=True)
            self.abort(
                400,
                'Could not parse %s as %s: %s' % (final_url, input, str(e)))

        self.write_response(
            source.Source.make_activities_base_response(activities),
            url=final_url,
            actor=actor,
            title=title,
            hfeed=hfeed)
Beispiel #10
0
def html_to_activity(html):
  return microformats2.html_to_activities(html)[0]['object']
Beispiel #11
0
  def get(self):
    expected_inputs = ('activitystreams', 'html', 'json-mf2')
    input = util.get_required_param(self, 'input')
    if input not in expected_inputs:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, expected_inputs))
    url = util.get_required_param(self, 'url')

    # check if request is cached
    cache = self.request.get('cache', '').lower() != 'false'
    cache_key = 'U %s' % url
    cached = memcache.get(cache_key) if cache else None

    if cached:
      logging.info('Serving cached response %r', cache_key)
      url = cached['url']
      body = cached['body']
    else:
      # fetch url
      try:
        resp = util.urlopen(url)
      except (ValueError, httplib.InvalidURL) as e:
        self.abort(400, str(e))
      except Exception as e:
        if util.is_connection_failure(e):
          # HTTP 504 Gateway Timeout
          self.abort(504, str(e))
        raise

      if url != resp.geturl():
        url = resp.geturl()
        logging.info('Redirected to %s', url)
      body = resp.read()

      if cache:
        logging.info('Caching response in %r', cache_key)
        memcache.set(cache_key, {'url': url, 'body': body}, URL_CACHE_TIME)

    # decode data
    mf2 = None
    if input == 'html':
      mf2 = mf2py.parse(doc=body, url=url)
    elif input == 'json-mf2':
      mf2 = json.loads(body)
      mf2.setdefault('rels', {})  # mf2util expects rels

    actor = None
    title = None
    if mf2:
      actor = microformats2.find_author(
        mf2, fetch_mf2_func=lambda url: mf2py.parse(url=url))
      title = mf2util.interpret_feed(mf2, url).get('name')

    if input == 'activitystreams':
      activities = json.loads(body)
    elif input == 'html':
      activities = microformats2.html_to_activities(body, url, actor)
    elif input == 'json-mf2':
      activities = [microformats2.json_to_object(item, actor=actor)
                    for item in mf2.get('items', [])]

    self.write_response(source.Source.make_activities_base_response(activities),
                        url=url, actor=actor, title=title)
Beispiel #12
0
  def get(self):
    input = util.get_required_param(self, 'input')
    if input not in INPUTS:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, INPUTS))
    url, body = self._fetch(util.get_required_param(self, 'url'))

    # decode data
    if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2', 'jsonfeed'):
      try:
        body_json = json.loads(body)
        body_items = (body_json if isinstance(body_json, list)
                      else body_json.get('items') or [body_json])
      except (TypeError, ValueError):
        raise exc.HTTPBadRequest('Could not decode %s as JSON' % url)

    mf2 = None
    if input == 'html':
      mf2 = mf2py.parse(doc=body, url=url, img_with_alt=True)
    elif input in ('mf2-json', 'json-mf2'):
      mf2 = body_json
      if not hasattr(mf2, 'get'):
        raise exc.HTTPBadRequest(
          'Expected microformats2 JSON input to be dict, got %s' %
          mf2.__class__.__name__)
      mf2.setdefault('rels', {})  # mf2util expects rels

    actor = None
    title = None
    hfeed = None
    if mf2:
      def fetch_mf2_func(url):
        if util.domain_or_parent_in(urlparse.urlparse(url).netloc, SILO_DOMAINS):
          return {'items': [{'type': ['h-card'], 'properties': {'url': [url]}}]}
        _, doc = self._fetch(url)
        return mf2py.parse(doc=doc, url=url, img_with_alt=True)

      try:
        actor = microformats2.find_author(mf2, fetch_mf2_func=fetch_mf2_func)
        title = microformats2.get_title(mf2)
        hfeed = mf2util.find_first_entry(mf2, ['h-feed'])
      except (KeyError, ValueError) as e:
        raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (url, input, e))

    try:
      if input in ('as1', 'activitystreams'):
        activities = body_items
      elif input == 'as2':
        activities = [as2.to_as1(obj) for obj in body_items]
      elif input == 'atom':
        try:
          activities = atom.atom_to_activities(body)
        except ElementTree.ParseError as e:
          raise exc.HTTPBadRequest('Could not parse %s as XML: %s' % (url, e))
        except ValueError as e:
          raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' % (url, e))
      elif input == 'html':
        activities = microformats2.html_to_activities(body, url, actor)
      elif input in ('mf2-json', 'json-mf2'):
        activities = [microformats2.json_to_object(item, actor=actor)
                      for item in mf2.get('items', [])]
      elif input == 'jsonfeed':
        activities, actor = jsonfeed.jsonfeed_to_activities(body_json)
    except ValueError as e:
      logging.warning('parsing input failed', exc_info=True)
      self.abort(400, 'Could not parse %s as %s: %s' % (url, input, str(e)))

    self.write_response(source.Source.make_activities_base_response(activities),
                        url=url, actor=actor, title=title, hfeed=hfeed)
Beispiel #13
0
def html_to_activity(html):
    return microformats2.html_to_activities(html)[0]['object']