def get(self): input = util.get_required_param(self, 'input') if input not in INPUTS: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, INPUTS)) url, body = self._fetch(util.get_required_param(self, 'url')) # decode data if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2', 'jsonfeed'): try: body_json = json.loads(body) body_items = (body_json if isinstance(body_json, list) else body_json.get('items') or [body_json]) except (TypeError, ValueError): raise exc.HTTPBadRequest('Could not decode %s as JSON' % url) mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url) elif input in ('mf2-json', 'json-mf2'): mf2 = body_json mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None if mf2: def fetch_mf2_func(url): if util.domain_or_parent_in( urlparse.urlparse(url).netloc, SILO_DOMAINS): return { 'items': [{ 'type': ['h-card'], 'properties': { 'url': [url] } }] } _, doc = self._fetch(url) return mf2py.parse(doc=doc, url=url) try: actor = microformats2.find_author( mf2, fetch_mf2_func=fetch_mf2_func) title = microformats2.get_title(mf2) except (KeyError, ValueError) as e: raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (url, input, e)) if input in ('as1', 'activitystreams'): activities = body_items elif input == 'as2': activities = [as2.to_as1(obj) for obj in body_items] elif input == 'atom': try: activities = atom.atom_to_activities(body) except ElementTree.ParseError as e: raise exc.HTTPBadRequest('Could not parse %s as XML: %s' % (url, e)) except ValueError as e: raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' % (url, e)) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input in ('mf2-json', 'json-mf2'): activities = [ microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', []) ] elif input == 'jsonfeed': try: activities, actor = jsonfeed.jsonfeed_to_activities(body_json) except ValueError as e: logging.exception('jsonfeed_to_activities failed') raise exc.HTTPBadRequest('Could not parse %s as JSON Feed' % url) self.write_response( source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title)
def get(self): input = util.get_required_param(self, 'input') if input not in INPUTS: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, INPUTS)) orig_url = util.get_required_param(self, 'url') fragment = urllib.parse.urlparse(orig_url).fragment if fragment and input != 'html': raise exc.HTTPBadRequest( 'URL fragments only supported with input=html.') resp = util.requests_get(orig_url, gateway=True) final_url = resp.url # decode data if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2', 'jsonfeed'): try: body_json = json_loads(resp.text) body_items = (body_json if isinstance(body_json, list) else body_json.get('items') or [body_json]) except (TypeError, ValueError): raise exc.HTTPBadRequest('Could not decode %s as JSON' % final_url) mf2 = None if input == 'html': mf2 = util.parse_mf2(resp, id=fragment) if id and not mf2: raise exc.HTTPBadRequest( 'Got fragment %s but no element found with that id.' % fragment) elif input in ('mf2-json', 'json-mf2'): mf2 = body_json if not hasattr(mf2, 'get'): raise exc.HTTPBadRequest( 'Expected microformats2 JSON input to be dict, got %s' % mf2.__class__.__name__) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None hfeed = None if mf2: def fetch_mf2_func(url): if util.domain_or_parent_in( urllib.parse.urlparse(url).netloc, SILO_DOMAINS): return { 'items': [{ 'type': ['h-card'], 'properties': { 'url': [url] } }] } return util.fetch_mf2(url, gateway=True) try: actor = microformats2.find_author( mf2, fetch_mf2_func=fetch_mf2_func) title = microformats2.get_title(mf2) hfeed = mf2util.find_first_entry(mf2, ['h-feed']) except (KeyError, ValueError) as e: raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (final_url, input, e)) try: if input in ('as1', 'activitystreams'): activities = body_items elif input == 'as2': activities = [as2.to_as1(obj) for obj in body_items] elif input == 'atom': try: activities = atom.atom_to_activities(resp.text) except ElementTree.ParseError as e: raise exc.HTTPBadRequest('Could not parse %s as XML: %s' % (final_url, e)) except ValueError as e: raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' % (final_url, e)) elif input == 'html': activities = microformats2.html_to_activities(resp, url=final_url, id=fragment, actor=actor) elif input in ('mf2-json', 'json-mf2'): activities = [ microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', []) ] elif input == 'jsonfeed': activities, actor = jsonfeed.jsonfeed_to_activities(body_json) except ValueError as e: logging.warning('parsing input failed', stack_info=True) self.abort( 400, 'Could not parse %s as %s: %s' % (final_url, input, str(e))) self.write_response( source.Source.make_activities_base_response(activities), url=final_url, actor=actor, title=title, hfeed=hfeed)
def test_atom_feed_to_activities(self): self.assert_equals([INSTAGRAM_ACTIVITY], atom.atom_to_activities(INSTAGRAM_FEED))
def get(self): input = util.get_required_param(self, 'input') if input not in INPUTS: raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' % (input, INPUTS)) url, body = self._fetch(util.get_required_param(self, 'url')) # decode data if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2', 'jsonfeed'): try: body_json = json.loads(body) body_items = (body_json if isinstance(body_json, list) else body_json.get('items') or [body_json]) except (TypeError, ValueError): raise exc.HTTPBadRequest('Could not decode %s as JSON' % url) mf2 = None if input == 'html': mf2 = mf2py.parse(doc=body, url=url, img_with_alt=True) elif input in ('mf2-json', 'json-mf2'): mf2 = body_json if not hasattr(mf2, 'get'): raise exc.HTTPBadRequest( 'Expected microformats2 JSON input to be dict, got %s' % mf2.__class__.__name__) mf2.setdefault('rels', {}) # mf2util expects rels actor = None title = None hfeed = None if mf2: def fetch_mf2_func(url): if util.domain_or_parent_in(urlparse.urlparse(url).netloc, SILO_DOMAINS): return {'items': [{'type': ['h-card'], 'properties': {'url': [url]}}]} _, doc = self._fetch(url) return mf2py.parse(doc=doc, url=url, img_with_alt=True) try: actor = microformats2.find_author(mf2, fetch_mf2_func=fetch_mf2_func) title = microformats2.get_title(mf2) hfeed = mf2util.find_first_entry(mf2, ['h-feed']) except (KeyError, ValueError) as e: raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (url, input, e)) try: if input in ('as1', 'activitystreams'): activities = body_items elif input == 'as2': activities = [as2.to_as1(obj) for obj in body_items] elif input == 'atom': try: activities = atom.atom_to_activities(body) except ElementTree.ParseError as e: raise exc.HTTPBadRequest('Could not parse %s as XML: %s' % (url, e)) except ValueError as e: raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' % (url, e)) elif input == 'html': activities = microformats2.html_to_activities(body, url, actor) elif input in ('mf2-json', 'json-mf2'): activities = [microformats2.json_to_object(item, actor=actor) for item in mf2.get('items', [])] elif input == 'jsonfeed': activities, actor = jsonfeed.jsonfeed_to_activities(body_json) except ValueError as e: logging.warning('parsing input failed', exc_info=True) self.abort(400, 'Could not parse %s as %s: %s' % (url, input, str(e))) self.write_response(source.Source.make_activities_base_response(activities), url=url, actor=actor, title=title, hfeed=hfeed)