예제 #1
0
def retrieve_and_parse_diaspora_webfinger(handle):
    """
    Retrieve a and parse a remote Diaspora webfinger document.

    :arg handle: Remote handle to retrieve
    :returns: dict
    """
    try:
        host = handle.split("@")[1]
    except AttributeError:
        logger.warning(
            "retrieve_and_parse_diaspora_webfinger: invalid handle given: %s",
            handle)
        return None
    document, code, exception = fetch_document(
        host=host,
        path="/.well-known/webfinger?resource=acct:%s" % quote(handle),
    )
    if document:
        return parse_diaspora_webfinger(document)
    hostmeta = retrieve_diaspora_host_meta(host)
    if not hostmeta:
        return None
    url = hostmeta.find_link(rels="lrdd").template.replace(
        "{uri}", quote(handle))
    document, code, exception = fetch_document(url)
    if exception:
        return None
    return parse_diaspora_webfinger(document)
예제 #2
0
def fetch_nodeinfo_document(host):
    doc, status_code, error = fetch_document(host=host,
                                             path='/.well-known/nodeinfo')
    if not doc:
        return
    try:
        doc = json.loads(doc)
    except json.JSONDecodeError:
        return

    url, highest_version = '', 0.0

    if doc.get('0'):
        # Buggy NodeInfo from certain old Hubzilla versions
        url = doc.get('0', {}).get('href')
    elif isinstance(doc.get('links'), dict):
        # Another buggy NodeInfo from certain old Hubzilla versions
        url = doc.get('links').get('href')
    else:
        for link in doc.get('links'):
            version = float(link.get('rel').split('/')[-1])
            if highest_version < version <= HIGHEST_SUPPORTED_NODEINFO_VERSION:
                url, highest_version = link.get('href'), version

    if not url:
        return

    doc, status_code, error = fetch_document(url=url)
    if not doc:
        return
    try:
        doc = json.loads(doc)
    except json.JSONDecodeError:
        return
    return parse_nodeinfo_document(doc, host)
예제 #3
0
 def test_extra_headers(self, mock_get):
     fetch_document("https://example.com/foo",
                    extra_headers={'accept': 'application/activity+json'})
     mock_get.assert_called_once_with('https://example.com/foo',
                                      headers={
                                          'user-agent': USER_AGENT,
                                          'accept':
                                          'application/activity+json',
                                      })
예제 #4
0
 def test_host_is_called_with_https_first_then_http(self, mock_get):
     def mock_failing_https_get(url, *args, **kwargs):
         if url.find("https://") > -1:
             raise HTTPError()
         return Mock(status_code=200, text="foo")
     mock_get.side_effect = mock_failing_https_get
     fetch_document(host="localhost")
     assert mock_get.call_count == 2
     assert mock_get.call_args_list == [
         call("https://localhost/", **self.call_args),
         call("http://localhost/", **self.call_args),
     ]
예제 #5
0
 def test_exception_is_raised_if_both_protocols_fail(self, mock_get):
     mock_get.side_effect = HTTPError
     doc, code, exc = fetch_document(host="localhost")
     assert mock_get.call_count == 2
     assert doc == None
     assert code == None
     assert exc.__class__ == HTTPError
예제 #6
0
 def test_exception_is_raised_if_url_fails(self, mock_get):
     mock_get.side_effect = HTTPError
     doc, code, exc = fetch_document("localhost")
     assert mock_get.call_count == 1
     assert doc == None
     assert code == None
     assert exc.__class__ == HTTPError
예제 #7
0
 def test_exception_is_raised_if_http_fails_and_raise_ssl_errors_true(self, mock_get):
     mock_get.side_effect = SSLError
     doc, code, exc = fetch_document("localhost")
     assert mock_get.call_count == 1
     assert doc == None
     assert code == None
     assert exc.__class__ == SSLError
예제 #8
0
 def test_exception_is_raised_on_network_error(self, mock_get):
     mock_get.side_effect = RequestException
     doc, code, exc = fetch_document(host="localhost")
     assert mock_get.call_count == 1
     assert doc == None
     assert code == None
     assert exc.__class__ == RequestException
예제 #9
0
def retrieve_and_parse_content(
        id: str, guid: str, handle: str, entity_type: str, sender_key_fetcher: Callable[[str], str]=None,
):
    """Retrieve remote content and return an Entity class instance.

    This is basically the inverse of receiving an entity. Instead, we fetch it, then call "handle_receive".

    :param sender_key_fetcher: Function to use to fetch sender public key. If not given, network will be used
        to fetch the profile and the key. Function must take handle as only parameter and return a public key.
    :returns: Entity object instance or ``None``
    """
    if not validate_handle(handle):
        return
    _username, domain = handle.split("@")
    url = get_fetch_content_endpoint(domain, entity_type.lower(), guid)
    document, status_code, error = fetch_document(url)
    if status_code == 200:
        request = RequestType(body=document)
        _sender, _protocol, entities = handle_receive(request, sender_key_fetcher=sender_key_fetcher)
        if len(entities) > 1:
            logger.warning("retrieve_and_parse_content - more than one entity parsed from remote even though we"
                           "expected only one! ID %s", guid)
        if entities:
            return entities[0]
        return
    elif status_code == 404:
        logger.warning("retrieve_and_parse_content - remote content %s not found", guid)
        return
    if error:
        raise error
    raise Exception("retrieve_and_parse_content - unknown problem when fetching document: %s, %s, %s" % (
        document, status_code, error,
    ))
예제 #10
0
def fetch_statisticsjson_document(host):
    doc, status_code, error = fetch_document(host=host, path='/statistics.json')
    if not doc:
        return
    try:
        doc = json.loads(doc)
    except json.JSONDecodeError:
        return
    return parse_statisticsjson_document(doc, host)
예제 #11
0
def fetch_nodeinfo2_document(host):
    doc, status_code, error = fetch_document(host=host, path='/.well-known/x-nodeinfo2')
    if not doc:
        return
    try:
        doc = json.loads(doc)
    except json.JSONDecodeError:
        return
    return parse_nodeinfo2_document(doc, host)
예제 #12
0
def fetch_matrix_document(host: str) -> Optional[Dict]:
    doc, status_code, error = fetch_document(host=host, path='/_matrix/federation/v1/version')
    if not doc:
        return
    try:
        doc = json.loads(doc)
    except json.JSONDecodeError:
        return
    return parse_matrix_document(doc, host)
예제 #13
0
def fetch_mastodon_document(host):
    doc, status_code, error = fetch_document(host=host, path='/api/v1/instance')
    if not doc:
        return
    try:
        doc = json.loads(doc)
    except json.JSONDecodeError:
        return
    return parse_mastodon_document(doc, host)
예제 #14
0
 def get_profile_room_id(self):
     # TODO: we should cache these.
     doc, status, error = fetch_document(
         url=
         f"{self.get_endpoint()}/directory/room/{self.profile_room_alias_url_safe}",
         extra_headers=appservice_auth_header(),
     )
     if status == 200:
         data = json.loads(doc)
         self._profile_room_id = data["room_id"]
예제 #15
0
def parse_mastodon_document(doc, host):
    result = deepcopy(defaults)
    result['host'] = host
    result['name'] = doc.get('title', host)
    result['platform'] = 'mastodon'
    result['version'] = doc.get('version', '')
    # TODO parse about page
    # https://github.com/TheKinrar/mastodon-instances/blob/master/tasks/update_instances.js#L508
    # result['open_signups']
    version = re.sub(r'[^0-9.]', '', doc.get('version', ''))
    version = [int(part) for part in version.split('.')]
    if version >= [1, 6, 0]:
        result['protocols'] = ['ostatus', 'activitypub']
    else:
        result['protocols'] = ['ostatus']
    result['relay'] = False

    result['activity']['users']['total'] = int_or_none(
        doc.get('stats', {}).get('user_count'))
    # TODO figure out what to do with posts vs comments vs statuses
    #result['activity']['users']['local_posts'] = int_or_none(doc.get('stats', {}).get('status_count'))

    result['organization']['account'] = doc.get('contact_account',
                                                {}).get('url', '')
    result['organization']['contact'] = doc.get('email', '')
    result['organization']['name'] = doc.get('contact_account',
                                             {}).get('display_name', '')

    activity_doc, _status_code, _error = fetch_document(
        host=host, path='/api/v1/instance/activity')
    if activity_doc:
        try:
            activity_doc = json.loads(activity_doc)
        except json.JSONDecodeError:
            return result
        else:
            try:
                logins = activity_doc[1].get('logins')
            except KeyError:
                logins = activity_doc[0].get('logins')
            weekly_count = int_or_none(logins)
            if weekly_count and result['activity']['users']['total']:
                result['activity']['users']['weekly'] = weekly_count
                # Ensure multiplied counts from weekly count don't go over total user count
                result['activity']['users']['half_year'] = min(
                    int(weekly_count * WEEKLY_USERS_HALFYEAR_MULTIPLIER),
                    result['activity']['users']['total'],
                )
                result['activity']['users']['monthly'] = min(
                    int(weekly_count * WEEKLY_USERS_MONTHLY_MULTIPLIER),
                    result['activity']['users']['total'],
                )

    return result
예제 #16
0
def retrieve_diaspora_hcard(handle):
    """
    Retrieve a remote Diaspora hCard document.

    :arg handle: Remote handle to retrieve
    :return: str (HTML document)
    """
    webfinger = retrieve_and_parse_diaspora_webfinger(handle)
    document, code, exception = fetch_document(webfinger.get("hcard_url"))
    if exception:
        return None
    return document
예제 #17
0
def retrieve_diaspora_host_meta(host):
    """
    Retrieve a remote Diaspora host-meta document.

    :arg host: Host to retrieve from
    :returns: ``XRD`` instance
    """
    document, code, exception = fetch_document(host=host, path="/.well-known/host-meta")
    if exception:
        return None
    xrd = XRD.parse_xrd(document)
    return xrd
예제 #18
0
def retrieve_diaspora_host_meta(host):
    """
    Retrieve a remote Diaspora host-meta document.

    :arg host: Host to retrieve from
    :returns: ``XRD`` instance
    """
    document, code, exception = fetch_document(host=host, path="/.well-known/host-meta")
    if exception:
        return None
    xrd = XRD.parse_xrd(document)
    return xrd
예제 #19
0
    def pre_send(self):
        """
        Check whether we need to create the user or their profile room.
        """
        doc, status, error = fetch_document(
            url=f"{super().get_endpoint()}/profile/{self.mxid}",
            extra_headers=appservice_auth_header(),
        )
        if status != 200:
            self._remote_profile_create_needed = True
        else:
            self.get_profile_room_id()

        if self._remote_profile_create_needed or not self._profile_room_id:
            self._remote_room_create_needed = True
예제 #20
0
def retrieve_diaspora_host_meta(host):
    """
    Retrieve a remote Diaspora host-meta document.

    Args:
        host (str) - Host to retrieve from

    Returns:
        XRD
    """
    document, code, exception = fetch_document(host=host, path="/.well-known/host-meta")
    if exception:
        return None
    xrd = XRD.parse_xrd(document)
    return xrd
예제 #21
0
def retrieve_diaspora_hcard(handle):
    """
    Retrieve a remote Diaspora hCard document.

    :arg handle: Remote handle to retrieve
    :return: str (HTML document)
    """
    webfinger = retrieve_diaspora_webfinger(handle)
    if not webfinger:
        return None
    url = webfinger.find_link(rels="http://microformats.org/profile/hcard").href
    document, code, exception = fetch_document(url)
    if exception:
        return None
    return document
예제 #22
0
def retrieve_diaspora_hcard(handle):
    """
    Retrieve a remote Diaspora hCard document.

    :arg handle: Remote handle to retrieve
    :return: str (HTML document)
    """
    webfinger = retrieve_diaspora_webfinger(handle)
    if not webfinger:
        return None
    url = webfinger.find_link(rels="http://microformats.org/profile/hcard").href
    document, code, exception = fetch_document(url)
    if exception:
        return None
    return document
예제 #23
0
def retrieve_and_parse_document(fid: str) -> Optional[Any]:
    """
    Retrieve remote document by ID and return the entity.
    """
    document, status_code, ex = fetch_document(
        fid, extra_headers={'accept': 'application/activity+json'})
    if document:
        document = json.loads(decode_if_bytes(document))
        entities = message_to_objects(document, fid)
        logger.info("retrieve_and_parse_document - found %s entities",
                    len(entities))
        if entities:
            logger.info("retrieve_and_parse_document - using first entity: %s",
                        entities[0])
            return entities[0]
예제 #24
0
def parse_misskey_document(doc: Dict,
                           host: str,
                           mastodon_document: Dict = None) -> Dict:
    result = deepcopy(defaults)
    result['host'] = host

    result['organization']['name'] = doc.get('maintainer', {}).get('name', '')
    result['organization']['contact'] = doc.get('maintainer',
                                                {}).get('email', '')

    result['name'] = doc.get('name', host)
    result['open_signups'] = doc.get('features', {}).get('registration', False)
    result['protocols'] = ['activitypub']

    if doc.get('features', {}).get('twitter', False):
        result['services'].append('twitter')
    if doc.get('features', {}).get('github', False):
        result['services'].append('github')
    if doc.get('features', {}).get('discord', False):
        result['services'].append('discord')

    result['platform'] = 'misskey'
    result['version'] = doc.get('version', '')
    result['features'] = doc.get('features', {})

    if not mastodon_document:
        # Fetch also Mastodon API doc to get some counts...
        api_doc, _status_code, _error = fetch_document(host=host,
                                                       path='/api/v1/instance')
        if api_doc:
            try:
                mastodon_document = json.loads(api_doc)
            except json.JSONDecodeError:
                pass
    if mastodon_document:
        result['activity']['users']['total'] = int_or_none(
            mastodon_document.get('stats', {}).get('user_count'))
        result['activity']['local_posts'] = int_or_none(
            mastodon_document.get('stats', {}).get('status_count'))

        if "contact_account" in mastodon_document and mastodon_document.get(
                'contact_account') is not None:
            contact_account = mastodon_document.get('contact_account', {})
        else:
            contact_account = {}
        result['organization']['account'] = contact_account.get('url', '')

    return result
예제 #25
0
def retrieve_diaspora_webfinger(handle):
    """
    Retrieve a remote Diaspora webfinger document.

    :arg handle: Remote handle to retrieve
    :returns: ``XRD`` instance
    """
    hostmeta = retrieve_diaspora_host_meta(handle.split("@")[1])
    if not hostmeta:
        return None
    url = hostmeta.find_link(rels="lrdd").template.replace("{uri}", quote(handle))
    document, code, exception = fetch_document(url)
    if exception:
        return None
    xrd = XRD.parse_xrd(document)
    return xrd
예제 #26
0
def retrieve_diaspora_webfinger(handle):
    """
    Retrieve a remote Diaspora webfinger document.

    :arg handle: Remote handle to retrieve
    :returns: ``XRD`` instance
    """
    hostmeta = retrieve_diaspora_host_meta(handle.split("@")[1])
    if not hostmeta:
        return None
    url = hostmeta.find_link(rels="lrdd").template.replace("{uri}", quote(handle))
    document, code, exception = fetch_document(url)
    if exception:
        return None
    try:
        xrd = XRD.parse_xrd(document)
    except xml.parsers.expat.ExpatError:
        return None
    return xrd
예제 #27
0
def retrieve_and_parse_diaspora_webfinger(handle):
    """
    Retrieve a and parse a remote Diaspora webfinger document.

    :arg handle: Remote handle to retrieve
    :returns: dict
    """
    document = try_retrieve_webfinger_document(handle)
    if document:
        return parse_diaspora_webfinger(document)
    host = handle.split("@")[1]
    hostmeta = retrieve_diaspora_host_meta(host)
    if not hostmeta:
        return None
    url = hostmeta.find_link(rels="lrdd").template.replace("{uri}", quote(handle))
    document, code, exception = fetch_document(url)
    if exception:
        return None
    return parse_diaspora_webfinger(document)
예제 #28
0
def retrieve_and_parse_content(id, sender_key_fetcher=None):
    """Retrieve remote content and return an Entity class instance.

    This is basically the inverse of receiving an entity. Instead, we fetch it, then call "handle_receive".

    :param id: Diaspora URI scheme format ID.
    :param sender_key_fetcher: Function to use to fetch sender public key. If not given, network will be used
        to fetch the profile and the key. Function must take handle as only parameter and return a public key.
    :returns: Entity object instance or ``None``
    """
    handle, entity_type, guid = parse_diaspora_uri(id)
    _username, domain = handle.split("@")
    url = get_fetch_content_endpoint(domain, entity_type, guid)
    document, status_code, error = fetch_document(url)
    if status_code == 200:
        _sender, _protocol, entities = handle_receive(
            document, sender_key_fetcher=sender_key_fetcher)
        if len(entities) > 1:
            logger.warning(
                "retrieve_and_parse_content - more than one entity parsed from remote even though we"
                "expected only one! ID %s", id)
        if entities:
            return entities[0]
        return
    elif status_code == 404:
        logger.warning(
            "retrieve_and_parse_content - remote content %s not found", id)
        return
    if error:
        raise error
    raise Exception(
        "retrieve_and_parse_content - unknown problem when fetching document: %s, %s, %s"
        % (
            document,
            status_code,
            error,
        ))
예제 #29
0
 def test_path_is_sanitized(self, mock_get):
     mock_get.return_value = Mock(status_code=200, text="foo")
     fetch_document(host="localhost", path="foobar/bazfoo")
     assert mock_get.call_args_list == [
         call("https://localhost/foobar/bazfoo", **self.call_args)
     ]
예제 #30
0
 def test_url_is_called(self, mock_get):
     mock_get.return_value = Mock(status_code=200, text="foo")
     fetch_document("https://localhost")
     assert mock_get.called
예제 #31
0
 def test_raises_without_url_and_host(self):
     with pytest.raises(ValueError):
         fetch_document()
예제 #32
0
def parse_mastodon_document(doc, host):
    # Check first this is not actually Pleroma or Misskey
    if doc.get('version', '').find('Pleroma') > -1 or doc.get('version', '').find('Pixelfed') > -1 or \
            doc.get('version', '').find('Kibou') > -1 or doc.get('version', '').find('Kroeg') > -1:
        # Use NodeInfo instead, otherwise this is logged as Mastodon
        from federation.hostmeta.fetchers import fetch_nodeinfo_document
        return fetch_nodeinfo_document(host)
    elif doc.get('version', '').find('misskey') > -1:
        # Use Misskey instead, otherwise this is logged as Mastodon
        from federation.hostmeta.fetchers import fetch_misskey_document
        return fetch_misskey_document(host, mastodon_document=doc)

    result = deepcopy(defaults)
    result['host'] = host
    result['name'] = doc.get('title', host)
    result['platform'] = 'mastodon'
    result['version'] = doc.get('version', '')

    # Awkward parsing of signups from about page
    # TODO remove if fixed, issue logged: https://github.com/tootsuite/mastodon/issues/9350
    about_doc, _status_code, _error = fetch_document(host=host, path='/about')
    if about_doc:
        result['open_signups'] = about_doc.find(
            "<div class='closed-registrations-message'>") == -1

    version = re.sub(r'[^0-9.]', '', doc.get('version', ''))
    version = [int(part) for part in version.split('.')]
    if version >= [3, 0, 0]:
        result['protocols'] = ['activitypub']
    elif version >= [1, 6, 0]:
        result['protocols'] = ['ostatus', 'activitypub']
    else:
        result['protocols'] = ['ostatus']
    result['relay'] = False

    result['activity']['users']['total'] = int_or_none(
        doc.get('stats', {}).get('user_count'))
    result['activity']['local_posts'] = int_or_none(
        doc.get('stats', {}).get('status_count'))

    if "contact_account" in doc and doc.get('contact_account') is not None:
        contact_account = doc.get('contact_account', {})
    else:
        contact_account = {}
    result['organization']['account'] = contact_account.get('url', '')
    result['organization']['contact'] = doc.get('email', '')
    result['organization']['name'] = contact_account.get('display_name', '')

    activity_doc, _status_code, _error = fetch_document(
        host=host, path='/api/v1/instance/activity')
    if activity_doc:
        try:
            activity_doc = json.loads(activity_doc)
        except json.JSONDecodeError:
            return result
        else:
            try:
                logins = activity_doc[1].get('logins')
            except KeyError:
                logins = activity_doc[0].get('logins')
            weekly_count = int_or_none(logins)
            if weekly_count and result['activity']['users']['total']:
                result['activity']['users']['weekly'] = weekly_count
                # Ensure multiplied counts from weekly count don't go over total user count
                result['activity']['users']['half_year'] = min(
                    int(weekly_count * WEEKLY_USERS_HALFYEAR_MULTIPLIER),
                    result['activity']['users']['total'],
                )
                result['activity']['users']['monthly'] = min(
                    int(weekly_count * WEEKLY_USERS_MONTHLY_MULTIPLIER),
                    result['activity']['users']['total'],
                )

    return result