def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) # fetch url url = util.get_required_param(self, 'url') logging.info('Fetching %s', url) resp = urllib2.urlopen(url, timeout=appengine_config.HTTP_TIMEOUT) if url != resp.geturl(): logging.info('Redirected to %s', resp.geturl()) body = resp.read() # decode data if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, resp.geturl()) elif input == 'json-mf2': activities = [ microformats2.json_to_object(item) for item in json.loads(body).get('items', []) ] self.write_response( source.Source.make_activities_base_response(activities))
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) # fetch url url = util.get_required_param(self, 'url') logging.info('Fetching %s', url) resp = urllib2.urlopen(url, timeout=appengine_config.HTTP_TIMEOUT) if url != resp.geturl(): logging.info('Redirected to %s', resp.geturl()) body = resp.read() # decode data if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, resp.geturl()) elif input == 'json-mf2': activities = [microformats2.json_to_object(item) for item in json.loads(body).get('items', [])] self.write_response(source.Source.make_activities_base_response(activities))
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) url = util.get_required_param(self, 'url') # check if request is cached cache = self.request.get('cache', '').lower() != 'false' cache_key = 'U %s' % url cached = memcache.get(cache_key) if cache else None if cached: logging.info('Serving cached response %r', cache_key) url = cached['url'] body = cached['body'] else: # fetch url try: resp = util.urlopen(url) except (ValueError, httplib.InvalidURL) as e: self.abort(400, str(e)) # other exceptions are handled by webutil.handlers.handle_exception(), # which uses interpret_http_exception(), etc. if url != resp.geturl(): url = resp.geturl() logging.info('Redirected to %s', url) body = resp.read() if cache: logging.info('Caching response in %r', cache_key) memcache.set(cache_key, {'url': url, 'body': body}, URL_CACHE_TIME) # decode data mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url) elif input == 'json-mf2': mf2 = json.loads(body) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None if mf2: actor = microformats2.find_author( mf2, fetch_mf2_func=lambda url: mf2py.parse(url=url)) title = mf2util.interpret_feed(mf2, url).get('name') if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input == 'json-mf2': activities = [microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', [])] self.write_response(source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title)
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2', 'jsonfeed') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) url, body = self._urlopen(util.get_required_param(self, 'url')) # decode data mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url) elif input == 'json-mf2': mf2 = json.loads(body) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None if mf2: def fetch_mf2_func(url): _, doc = self._urlopen(url) return mf2py.parse(doc=doc, url=url) actor = microformats2.find_author(mf2, fetch_mf2_func=fetch_mf2_func) title = mf2util.interpret_feed(mf2, url).get('name') if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input == 'json-mf2': activities = [ microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', []) ] elif input == 'jsonfeed': activities, actor = jsonfeed.jsonfeed_to_activities( json.loads(body)) self.write_response( source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title)
def test_html_to_activities_brs_to_newlines(self): """Mostly tests that mf2py converts <br>s to \ns. Background: https://github.com/snarfed/granary/issues/142 https://github.com/microformats/mf2py/issues/51 https://pin13.net/mf2/whitespace.html """ html = """\ <article class="h-entry"> <div class="e-content p-name">foo bar<br />baz <br><br> baj</div> </article>""" activities = microformats2.html_to_activities(html) self.assert_equals([{'object': { 'objectType': 'note', 'content': 'foo bar<br/>baz <br/><br/> baj', 'content_is_html': True, 'displayName': 'foo bar\nbaz \n\n baj', }}], activities)
def test_html_to_activities_brs_to_newlines(self): """Mostly tests that mf2py converts <br>s to \ns. Background: https://github.com/snarfed/granary/issues/142 https://github.com/microformats/mf2py/issues/51 https://pin13.net/mf2/whitespace.html """ html = """\ <article class="h-entry"> <div class="e-content p-name">foo bar<br />baz <br><br> baj</div> </article>""" activities = microformats2.html_to_activities(html) self.assert_equals([{ 'object': { 'objectType': 'note', 'content': 'foo bar<br/>baz <br/><br/> baj', 'content_is_html': True, 'displayName': 'foo bar\nbaz \n\n baj', } }], activities)
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) # fetch url url = util.get_required_param(self, 'url') resp = util.urlopen(url) if url != resp.geturl(): url = resp.geturl() logging.info('Redirected to %s', url) body = resp.read() # decode data mf2 = None if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, url) mf2 = mf2py.parse(doc=body, url=url) elif input == 'json-mf2': mf2 = json.loads(body) mf2['rels'] = {} # mf2util expects rels activities = [microformats2.json_to_object(item) for item in mf2.get('items', [])] author = None title = None if mf2: author = microformats2.find_author(mf2) title = mf2util.interpret_feed(mf2, url).get('name') self.write_response(source.Source.make_activities_base_response(activities), url=url, actor=author, title=title)
def get(self): input = util.get_required_param(self, 'input') if input not in INPUTS: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, INPUTS)) url, body = self._fetch(util.get_required_param(self, 'url')) # decode data if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2', 'jsonfeed'): try: body_json = json.loads(body) body_items = (body_json if isinstance(body_json, list) else body_json.get('items') or [body_json]) except (TypeError, ValueError): raise exc.HTTPBadRequest('Could not decode %s as JSON' % url) mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url) elif input in ('mf2-json', 'json-mf2'): mf2 = body_json mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None if mf2: def fetch_mf2_func(url): if util.domain_or_parent_in( urlparse.urlparse(url).netloc, SILO_DOMAINS): return { 'items': [{ 'type': ['h-card'], 'properties': { 'url': [url] } }] } _, doc = self._fetch(url) return mf2py.parse(doc=doc, url=url) try: actor = microformats2.find_author( mf2, fetch_mf2_func=fetch_mf2_func) title = microformats2.get_title(mf2) except (KeyError, ValueError) as e: raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (url, input, e)) if input in ('as1', 'activitystreams'): activities = body_items elif input == 'as2': activities = [as2.to_as1(obj) for obj in body_items] elif input == 'atom': try: activities = atom.atom_to_activities(body) except ElementTree.ParseError as e: raise exc.HTTPBadRequest('Could not parse %s as XML: %s' % (url, e)) except ValueError as e: raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' % (url, e)) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input in ('mf2-json', 'json-mf2'): activities = [ microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', []) ] elif input == 'jsonfeed': try: activities, actor = jsonfeed.jsonfeed_to_activities(body_json) except ValueError as e: logging.exception('jsonfeed_to_activities failed') raise exc.HTTPBadRequest('Could not parse %s as JSON Feed' % url) self.write_response( source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title)
def get(self): input = util.get_required_param(self, 'input') if input not in INPUTS: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, INPUTS)) orig_url = util.get_required_param(self, 'url') fragment = urllib.parse.urlparse(orig_url).fragment if fragment and input != 'html': raise exc.HTTPBadRequest( 'URL fragments only supported with input=html.') resp = util.requests_get(orig_url, gateway=True) final_url = resp.url # decode data if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2', 'jsonfeed'): try: body_json = json_loads(resp.text) body_items = (body_json if isinstance(body_json, list) else body_json.get('items') or [body_json]) except (TypeError, ValueError): raise exc.HTTPBadRequest('Could not decode %s as JSON' % final_url) mf2 = None if input == 'html': mf2 = util.parse_mf2(resp, id=fragment) if id and not mf2: raise exc.HTTPBadRequest( 'Got fragment %s but no element found with that id.' % fragment) elif input in ('mf2-json', 'json-mf2'): mf2 = body_json if not hasattr(mf2, 'get'): raise exc.HTTPBadRequest( 'Expected microformats2 JSON input to be dict, got %s' % mf2.__class__.__name__) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None hfeed = None if mf2: def fetch_mf2_func(url): if util.domain_or_parent_in( urllib.parse.urlparse(url).netloc, SILO_DOMAINS): return { 'items': [{ 'type': ['h-card'], 'properties': { 'url': [url] } }] } return util.fetch_mf2(url, gateway=True) try: actor = microformats2.find_author( mf2, fetch_mf2_func=fetch_mf2_func) title = microformats2.get_title(mf2) hfeed = mf2util.find_first_entry(mf2, ['h-feed']) except (KeyError, ValueError) as e: raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (final_url, input, e)) try: if input in ('as1', 'activitystreams'): activities = body_items elif input == 'as2': activities = [as2.to_as1(obj) for obj in body_items] elif input == 'atom': try: activities = atom.atom_to_activities(resp.text) except ElementTree.ParseError as e: raise exc.HTTPBadRequest('Could not parse %s as XML: %s' % (final_url, e)) except ValueError as e: raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' % (final_url, e)) elif input == 'html': activities = microformats2.html_to_activities(resp, url=final_url, id=fragment, actor=actor) elif input in ('mf2-json', 'json-mf2'): activities = [ microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', []) ] elif input == 'jsonfeed': activities, actor = jsonfeed.jsonfeed_to_activities(body_json) except ValueError as e: logging.warning('parsing input failed', stack_info=True) self.abort( 400, 'Could not parse %s as %s: %s' % (final_url, input, str(e))) self.write_response( source.Source.make_activities_base_response(activities), url=final_url, actor=actor, title=title, hfeed=hfeed)
def html_to_activity(html): return microformats2.html_to_activities(html)[0]['object']
def get(self): expected_inputs = ('activitystreams', 'html', 'json-mf2') input = util.get_required_param(self, 'input') if input not in expected_inputs: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, expected_inputs)) url = util.get_required_param(self, 'url') # check if request is cached cache = self.request.get('cache', '').lower() != 'false' cache_key = 'U %s' % url cached = memcache.get(cache_key) if cache else None if cached: logging.info('Serving cached response %r', cache_key) url = cached['url'] body = cached['body'] else: # fetch url try: resp = util.urlopen(url) except (ValueError, httplib.InvalidURL) as e: self.abort(400, str(e)) except Exception as e: if util.is_connection_failure(e): # HTTP 504 Gateway Timeout self.abort(504, str(e)) raise if url != resp.geturl(): url = resp.geturl() logging.info('Redirected to %s', url) body = resp.read() if cache: logging.info('Caching response in %r', cache_key) memcache.set(cache_key, {'url': url, 'body': body}, URL_CACHE_TIME) # decode data mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url) elif input == 'json-mf2': mf2 = json.loads(body) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None if mf2: actor = microformats2.find_author( mf2, fetch_mf2_func=lambda url: mf2py.parse(url=url)) title = mf2util.interpret_feed(mf2, url).get('name') if input == 'activitystreams': activities = json.loads(body) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input == 'json-mf2': activities = [microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', [])] self.write_response(source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title)
def get(self): input = util.get_required_param(self, 'input') if input not in INPUTS: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, INPUTS)) url, body = self._fetch(util.get_required_param(self, 'url')) # decode data if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2', 'jsonfeed'): try: body_json = json.loads(body) body_items = (body_json if isinstance(body_json, list) else body_json.get('items') or [body_json]) except (TypeError, ValueError): raise exc.HTTPBadRequest('Could not decode %s as JSON' % url) mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url, img_with_alt=True) elif input in ('mf2-json', 'json-mf2'): mf2 = body_json if not hasattr(mf2, 'get'): raise exc.HTTPBadRequest( 'Expected microformats2 JSON input to be dict, got %s' % mf2.__class__.__name__) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None hfeed = None if mf2: def fetch_mf2_func(url): if util.domain_or_parent_in(urlparse.urlparse(url).netloc, SILO_DOMAINS): return {'items': [{'type': ['h-card'], 'properties': {'url': [url]}}]} _, doc = self._fetch(url) return mf2py.parse(doc=doc, url=url, img_with_alt=True) try: actor = microformats2.find_author(mf2, fetch_mf2_func=fetch_mf2_func) title = microformats2.get_title(mf2) hfeed = mf2util.find_first_entry(mf2, ['h-feed']) except (KeyError, ValueError) as e: raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (url, input, e)) try: if input in ('as1', 'activitystreams'): activities = body_items elif input == 'as2': activities = [as2.to_as1(obj) for obj in body_items] elif input == 'atom': try: activities = atom.atom_to_activities(body) except ElementTree.ParseError as e: raise exc.HTTPBadRequest('Could not parse %s as XML: %s' % (url, e)) except ValueError as e: raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' % (url, e)) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input in ('mf2-json', 'json-mf2'): activities = [microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', [])] elif input == 'jsonfeed': activities, actor = jsonfeed.jsonfeed_to_activities(body_json) except ValueError as e: logging.warning('parsing input failed', exc_info=True) self.abort(400, 'Could not parse %s as %s: %s' % (url, input, str(e))) self.write_response(source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title, hfeed=hfeed)