def retrieve_and_parse_diaspora_webfinger(handle): """ Retrieve a and parse a remote Diaspora webfinger document. :arg handle: Remote handle to retrieve :returns: dict """ try: host = handle.split("@")[1] except AttributeError: logger.warning( "retrieve_and_parse_diaspora_webfinger: invalid handle given: %s", handle) return None document, code, exception = fetch_document( host=host, path="/.well-known/webfinger?resource=acct:%s" % quote(handle), ) if document: return parse_diaspora_webfinger(document) hostmeta = retrieve_diaspora_host_meta(host) if not hostmeta: return None url = hostmeta.find_link(rels="lrdd").template.replace( "{uri}", quote(handle)) document, code, exception = fetch_document(url) if exception: return None return parse_diaspora_webfinger(document)
def fetch_nodeinfo_document(host): doc, status_code, error = fetch_document(host=host, path='/.well-known/nodeinfo') if not doc: return try: doc = json.loads(doc) except json.JSONDecodeError: return url, highest_version = '', 0.0 if doc.get('0'): # Buggy NodeInfo from certain old Hubzilla versions url = doc.get('0', {}).get('href') elif isinstance(doc.get('links'), dict): # Another buggy NodeInfo from certain old Hubzilla versions url = doc.get('links').get('href') else: for link in doc.get('links'): version = float(link.get('rel').split('/')[-1]) if highest_version < version <= HIGHEST_SUPPORTED_NODEINFO_VERSION: url, highest_version = link.get('href'), version if not url: return doc, status_code, error = fetch_document(url=url) if not doc: return try: doc = json.loads(doc) except json.JSONDecodeError: return return parse_nodeinfo_document(doc, host)
def test_extra_headers(self, mock_get): fetch_document("https://example.com/foo", extra_headers={'accept': 'application/activity+json'}) mock_get.assert_called_once_with('https://example.com/foo', headers={ 'user-agent': USER_AGENT, 'accept': 'application/activity+json', })
def test_host_is_called_with_https_first_then_http(self, mock_get): def mock_failing_https_get(url, *args, **kwargs): if url.find("https://") > -1: raise HTTPError() return Mock(status_code=200, text="foo") mock_get.side_effect = mock_failing_https_get fetch_document(host="localhost") assert mock_get.call_count == 2 assert mock_get.call_args_list == [ call("https://localhost/", **self.call_args), call("http://localhost/", **self.call_args), ]
def test_exception_is_raised_if_both_protocols_fail(self, mock_get): mock_get.side_effect = HTTPError doc, code, exc = fetch_document(host="localhost") assert mock_get.call_count == 2 assert doc == None assert code == None assert exc.__class__ == HTTPError
def test_exception_is_raised_if_url_fails(self, mock_get): mock_get.side_effect = HTTPError doc, code, exc = fetch_document("localhost") assert mock_get.call_count == 1 assert doc == None assert code == None assert exc.__class__ == HTTPError
def test_exception_is_raised_if_http_fails_and_raise_ssl_errors_true(self, mock_get): mock_get.side_effect = SSLError doc, code, exc = fetch_document("localhost") assert mock_get.call_count == 1 assert doc == None assert code == None assert exc.__class__ == SSLError
def test_exception_is_raised_on_network_error(self, mock_get): mock_get.side_effect = RequestException doc, code, exc = fetch_document(host="localhost") assert mock_get.call_count == 1 assert doc == None assert code == None assert exc.__class__ == RequestException
def retrieve_and_parse_content( id: str, guid: str, handle: str, entity_type: str, sender_key_fetcher: Callable[[str], str]=None, ): """Retrieve remote content and return an Entity class instance. This is basically the inverse of receiving an entity. Instead, we fetch it, then call "handle_receive". :param sender_key_fetcher: Function to use to fetch sender public key. If not given, network will be used to fetch the profile and the key. Function must take handle as only parameter and return a public key. :returns: Entity object instance or ``None`` """ if not validate_handle(handle): return _username, domain = handle.split("@") url = get_fetch_content_endpoint(domain, entity_type.lower(), guid) document, status_code, error = fetch_document(url) if status_code == 200: request = RequestType(body=document) _sender, _protocol, entities = handle_receive(request, sender_key_fetcher=sender_key_fetcher) if len(entities) > 1: logger.warning("retrieve_and_parse_content - more than one entity parsed from remote even though we" "expected only one! ID %s", guid) if entities: return entities[0] return elif status_code == 404: logger.warning("retrieve_and_parse_content - remote content %s not found", guid) return if error: raise error raise Exception("retrieve_and_parse_content - unknown problem when fetching document: %s, %s, %s" % ( document, status_code, error, ))
def fetch_statisticsjson_document(host): doc, status_code, error = fetch_document(host=host, path='/statistics.json') if not doc: return try: doc = json.loads(doc) except json.JSONDecodeError: return return parse_statisticsjson_document(doc, host)
def fetch_nodeinfo2_document(host): doc, status_code, error = fetch_document(host=host, path='/.well-known/x-nodeinfo2') if not doc: return try: doc = json.loads(doc) except json.JSONDecodeError: return return parse_nodeinfo2_document(doc, host)
def fetch_matrix_document(host: str) -> Optional[Dict]: doc, status_code, error = fetch_document(host=host, path='/_matrix/federation/v1/version') if not doc: return try: doc = json.loads(doc) except json.JSONDecodeError: return return parse_matrix_document(doc, host)
def fetch_mastodon_document(host): doc, status_code, error = fetch_document(host=host, path='/api/v1/instance') if not doc: return try: doc = json.loads(doc) except json.JSONDecodeError: return return parse_mastodon_document(doc, host)
def get_profile_room_id(self): # TODO: we should cache these. doc, status, error = fetch_document( url= f"{self.get_endpoint()}/directory/room/{self.profile_room_alias_url_safe}", extra_headers=appservice_auth_header(), ) if status == 200: data = json.loads(doc) self._profile_room_id = data["room_id"]
def parse_mastodon_document(doc, host): result = deepcopy(defaults) result['host'] = host result['name'] = doc.get('title', host) result['platform'] = 'mastodon' result['version'] = doc.get('version', '') # TODO parse about page # https://github.com/TheKinrar/mastodon-instances/blob/master/tasks/update_instances.js#L508 # result['open_signups'] version = re.sub(r'[^0-9.]', '', doc.get('version', '')) version = [int(part) for part in version.split('.')] if version >= [1, 6, 0]: result['protocols'] = ['ostatus', 'activitypub'] else: result['protocols'] = ['ostatus'] result['relay'] = False result['activity']['users']['total'] = int_or_none( doc.get('stats', {}).get('user_count')) # TODO figure out what to do with posts vs comments vs statuses #result['activity']['users']['local_posts'] = int_or_none(doc.get('stats', {}).get('status_count')) result['organization']['account'] = doc.get('contact_account', {}).get('url', '') result['organization']['contact'] = doc.get('email', '') result['organization']['name'] = doc.get('contact_account', {}).get('display_name', '') activity_doc, _status_code, _error = fetch_document( host=host, path='/api/v1/instance/activity') if activity_doc: try: activity_doc = json.loads(activity_doc) except json.JSONDecodeError: return result else: try: logins = activity_doc[1].get('logins') except KeyError: logins = activity_doc[0].get('logins') weekly_count = int_or_none(logins) if weekly_count and result['activity']['users']['total']: result['activity']['users']['weekly'] = weekly_count # Ensure multiplied counts from weekly count don't go over total user count result['activity']['users']['half_year'] = min( int(weekly_count * WEEKLY_USERS_HALFYEAR_MULTIPLIER), result['activity']['users']['total'], ) result['activity']['users']['monthly'] = min( int(weekly_count * WEEKLY_USERS_MONTHLY_MULTIPLIER), result['activity']['users']['total'], ) return result
def retrieve_diaspora_hcard(handle): """ Retrieve a remote Diaspora hCard document. :arg handle: Remote handle to retrieve :return: str (HTML document) """ webfinger = retrieve_and_parse_diaspora_webfinger(handle) document, code, exception = fetch_document(webfinger.get("hcard_url")) if exception: return None return document
def retrieve_diaspora_host_meta(host): """ Retrieve a remote Diaspora host-meta document. :arg host: Host to retrieve from :returns: ``XRD`` instance """ document, code, exception = fetch_document(host=host, path="/.well-known/host-meta") if exception: return None xrd = XRD.parse_xrd(document) return xrd
def pre_send(self): """ Check whether we need to create the user or their profile room. """ doc, status, error = fetch_document( url=f"{super().get_endpoint()}/profile/{self.mxid}", extra_headers=appservice_auth_header(), ) if status != 200: self._remote_profile_create_needed = True else: self.get_profile_room_id() if self._remote_profile_create_needed or not self._profile_room_id: self._remote_room_create_needed = True
def retrieve_diaspora_host_meta(host): """ Retrieve a remote Diaspora host-meta document. Args: host (str) - Host to retrieve from Returns: XRD """ document, code, exception = fetch_document(host=host, path="/.well-known/host-meta") if exception: return None xrd = XRD.parse_xrd(document) return xrd
def retrieve_diaspora_hcard(handle): """ Retrieve a remote Diaspora hCard document. :arg handle: Remote handle to retrieve :return: str (HTML document) """ webfinger = retrieve_diaspora_webfinger(handle) if not webfinger: return None url = webfinger.find_link(rels="http://microformats.org/profile/hcard").href document, code, exception = fetch_document(url) if exception: return None return document
def retrieve_and_parse_document(fid: str) -> Optional[Any]: """ Retrieve remote document by ID and return the entity. """ document, status_code, ex = fetch_document( fid, extra_headers={'accept': 'application/activity+json'}) if document: document = json.loads(decode_if_bytes(document)) entities = message_to_objects(document, fid) logger.info("retrieve_and_parse_document - found %s entities", len(entities)) if entities: logger.info("retrieve_and_parse_document - using first entity: %s", entities[0]) return entities[0]
def parse_misskey_document(doc: Dict, host: str, mastodon_document: Dict = None) -> Dict: result = deepcopy(defaults) result['host'] = host result['organization']['name'] = doc.get('maintainer', {}).get('name', '') result['organization']['contact'] = doc.get('maintainer', {}).get('email', '') result['name'] = doc.get('name', host) result['open_signups'] = doc.get('features', {}).get('registration', False) result['protocols'] = ['activitypub'] if doc.get('features', {}).get('twitter', False): result['services'].append('twitter') if doc.get('features', {}).get('github', False): result['services'].append('github') if doc.get('features', {}).get('discord', False): result['services'].append('discord') result['platform'] = 'misskey' result['version'] = doc.get('version', '') result['features'] = doc.get('features', {}) if not mastodon_document: # Fetch also Mastodon API doc to get some counts... api_doc, _status_code, _error = fetch_document(host=host, path='/api/v1/instance') if api_doc: try: mastodon_document = json.loads(api_doc) except json.JSONDecodeError: pass if mastodon_document: result['activity']['users']['total'] = int_or_none( mastodon_document.get('stats', {}).get('user_count')) result['activity']['local_posts'] = int_or_none( mastodon_document.get('stats', {}).get('status_count')) if "contact_account" in mastodon_document and mastodon_document.get( 'contact_account') is not None: contact_account = mastodon_document.get('contact_account', {}) else: contact_account = {} result['organization']['account'] = contact_account.get('url', '') return result
def retrieve_diaspora_webfinger(handle): """ Retrieve a remote Diaspora webfinger document. :arg handle: Remote handle to retrieve :returns: ``XRD`` instance """ hostmeta = retrieve_diaspora_host_meta(handle.split("@")[1]) if not hostmeta: return None url = hostmeta.find_link(rels="lrdd").template.replace("{uri}", quote(handle)) document, code, exception = fetch_document(url) if exception: return None xrd = XRD.parse_xrd(document) return xrd
def retrieve_diaspora_webfinger(handle): """ Retrieve a remote Diaspora webfinger document. :arg handle: Remote handle to retrieve :returns: ``XRD`` instance """ hostmeta = retrieve_diaspora_host_meta(handle.split("@")[1]) if not hostmeta: return None url = hostmeta.find_link(rels="lrdd").template.replace("{uri}", quote(handle)) document, code, exception = fetch_document(url) if exception: return None try: xrd = XRD.parse_xrd(document) except xml.parsers.expat.ExpatError: return None return xrd
def retrieve_and_parse_diaspora_webfinger(handle): """ Retrieve a and parse a remote Diaspora webfinger document. :arg handle: Remote handle to retrieve :returns: dict """ document = try_retrieve_webfinger_document(handle) if document: return parse_diaspora_webfinger(document) host = handle.split("@")[1] hostmeta = retrieve_diaspora_host_meta(host) if not hostmeta: return None url = hostmeta.find_link(rels="lrdd").template.replace("{uri}", quote(handle)) document, code, exception = fetch_document(url) if exception: return None return parse_diaspora_webfinger(document)
def retrieve_and_parse_content(id, sender_key_fetcher=None): """Retrieve remote content and return an Entity class instance. This is basically the inverse of receiving an entity. Instead, we fetch it, then call "handle_receive". :param id: Diaspora URI scheme format ID. :param sender_key_fetcher: Function to use to fetch sender public key. If not given, network will be used to fetch the profile and the key. Function must take handle as only parameter and return a public key. :returns: Entity object instance or ``None`` """ handle, entity_type, guid = parse_diaspora_uri(id) _username, domain = handle.split("@") url = get_fetch_content_endpoint(domain, entity_type, guid) document, status_code, error = fetch_document(url) if status_code == 200: _sender, _protocol, entities = handle_receive( document, sender_key_fetcher=sender_key_fetcher) if len(entities) > 1: logger.warning( "retrieve_and_parse_content - more than one entity parsed from remote even though we" "expected only one! ID %s", id) if entities: return entities[0] return elif status_code == 404: logger.warning( "retrieve_and_parse_content - remote content %s not found", id) return if error: raise error raise Exception( "retrieve_and_parse_content - unknown problem when fetching document: %s, %s, %s" % ( document, status_code, error, ))
def test_path_is_sanitized(self, mock_get): mock_get.return_value = Mock(status_code=200, text="foo") fetch_document(host="localhost", path="foobar/bazfoo") assert mock_get.call_args_list == [ call("https://localhost/foobar/bazfoo", **self.call_args) ]
def test_url_is_called(self, mock_get): mock_get.return_value = Mock(status_code=200, text="foo") fetch_document("https://localhost") assert mock_get.called
def test_raises_without_url_and_host(self): with pytest.raises(ValueError): fetch_document()
def parse_mastodon_document(doc, host): # Check first this is not actually Pleroma or Misskey if doc.get('version', '').find('Pleroma') > -1 or doc.get('version', '').find('Pixelfed') > -1 or \ doc.get('version', '').find('Kibou') > -1 or doc.get('version', '').find('Kroeg') > -1: # Use NodeInfo instead, otherwise this is logged as Mastodon from federation.hostmeta.fetchers import fetch_nodeinfo_document return fetch_nodeinfo_document(host) elif doc.get('version', '').find('misskey') > -1: # Use Misskey instead, otherwise this is logged as Mastodon from federation.hostmeta.fetchers import fetch_misskey_document return fetch_misskey_document(host, mastodon_document=doc) result = deepcopy(defaults) result['host'] = host result['name'] = doc.get('title', host) result['platform'] = 'mastodon' result['version'] = doc.get('version', '') # Awkward parsing of signups from about page # TODO remove if fixed, issue logged: https://github.com/tootsuite/mastodon/issues/9350 about_doc, _status_code, _error = fetch_document(host=host, path='/about') if about_doc: result['open_signups'] = about_doc.find( "<div class='closed-registrations-message'>") == -1 version = re.sub(r'[^0-9.]', '', doc.get('version', '')) version = [int(part) for part in version.split('.')] if version >= [3, 0, 0]: result['protocols'] = ['activitypub'] elif version >= [1, 6, 0]: result['protocols'] = ['ostatus', 'activitypub'] else: result['protocols'] = ['ostatus'] result['relay'] = False result['activity']['users']['total'] = int_or_none( doc.get('stats', {}).get('user_count')) result['activity']['local_posts'] = int_or_none( doc.get('stats', {}).get('status_count')) if "contact_account" in doc and doc.get('contact_account') is not None: contact_account = doc.get('contact_account', {}) else: contact_account = {} result['organization']['account'] = contact_account.get('url', '') result['organization']['contact'] = doc.get('email', '') result['organization']['name'] = contact_account.get('display_name', '') activity_doc, _status_code, _error = fetch_document( host=host, path='/api/v1/instance/activity') if activity_doc: try: activity_doc = json.loads(activity_doc) except json.JSONDecodeError: return result else: try: logins = activity_doc[1].get('logins') except KeyError: logins = activity_doc[0].get('logins') weekly_count = int_or_none(logins) if weekly_count and result['activity']['users']['total']: result['activity']['users']['weekly'] = weekly_count # Ensure multiplied counts from weekly count don't go over total user count result['activity']['users']['half_year'] = min( int(weekly_count * WEEKLY_USERS_HALFYEAR_MULTIPLIER), result['activity']['users']['total'], ) result['activity']['users']['monthly'] = min( int(weekly_count * WEEKLY_USERS_MONTHLY_MULTIPLIER), result['activity']['users']['total'], ) return result