Exemple #1
0
    def test_whitelisted_tags(self):

        self.assertTrue(
            html_cleaner.filter_a('a', 'href', 'http://www.oppia.com'))

        self.assertFalse(
            html_cleaner.filter_a('a', 'href', '<code>http://www.oppia.com'))

        self.assertTrue(
            html_cleaner.filter_a('a', 'title', 'http://www.oppia.com'))

        with self.assertRaises(Exception):
            html_cleaner.filter_a('link', 'href', 'http://www.oppia.com')
Exemple #2
0
    def test_whitelisted_tags(self):

        self.assertTrue(
            html_cleaner.filter_a('a', 'href', 'http://www.oppia.com'))

        self.assertFalse(
            html_cleaner.filter_a('a', 'href', '<code>http://www.oppia.com'))

        self.assertTrue(
            html_cleaner.filter_a('a', 'title', 'http://www.oppia.com'))

        with self.assertRaisesRegexp(
            Exception, 'The filter_a method should only be used for a tags.'):
            html_cleaner.filter_a('link', 'href', 'http://www.oppia.com')
Exemple #3
0
    def sanitize_url(obj: str) -> str:
        """Takes a string representing a URL and sanitizes it.

        Args:
            obj: str. A string representing a URL.

        Returns:
            str. An empty string if the URL does not start with http:// or
            https:// except when the string is empty. Otherwise, returns the
            original URL.

        Raises:
            AssertionError. The string is non-empty and does not start with
                http:// or https://.
        """
        if obj == '':
            return obj
        url_components = python_utils.url_split(obj) # type: ignore[no-untyped-call]
        quoted_url_components = (
            python_utils.url_quote(component) for component in url_components) # type: ignore[no-untyped-call]
        raw = python_utils.url_unsplit(quoted_url_components) # type: ignore[no-untyped-call]

        acceptable = html_cleaner.filter_a('a', 'href', obj) # type: ignore[no-untyped-call]
        assert acceptable, (
            'Invalid URL: Sanitized URL should start with '
            '\'http://\' or \'https://\'; received %s' % raw)
        return raw # type: ignore[no-any-return]
Exemple #4
0
    def sanitize_url(obj):
        """Takes a string representing a URL and sanitizes it.

        Args:
            obj: a string representing a URL.

        Returns:
            An empty string if the URL does not start with http:// or https://
            except when the string is empty. Otherwise, returns the original
            URL.

        Raises:
            AssertionError: The string is non-empty and does not start with
            http:// or https://
        """
        if obj == '':
            return obj
        url_components = urlparse.urlsplit(obj)
        quoted_url_components = (urllib.quote(component)
                                 for component in url_components)
        raw = urlparse.urlunsplit(quoted_url_components)

        acceptable = html_cleaner.filter_a('href', obj)
        assert acceptable, ('Invalid URL: Sanitized URL should start with '
                            '\'http://\' or \'https://\'; received %s' % raw)
        return raw
Exemple #5
0
    def sanitize_url(obj):
        """Takes a string representing a URL and sanitizes it.

        Args:
          obj: a string representing a URL.

        Returns:
          An empty string if the URL does not start with http:// or https://.
          Otherwise, returns the original URL.
        """
        url_components = urlparse.urlsplit(obj)
        quoted_url_components = (
            urllib.quote(component) for component in url_components)
        raw = urlparse.urlunsplit(quoted_url_components)

        acceptable = html_cleaner.filter_a('href', obj)
        assert acceptable, (
            'Invalid URL: Sanitized URL should start with '
            '\'http://\' or \'https://\'; received %s' % raw)
        return raw
    def normalize(cls, raw):
        """Validates and normalizes a raw Python object."""
        try:
            assert isinstance(raw, basestring)
            raw = unicode(raw)
            if raw:
                url_components = urlparse.urlsplit(raw)
                quoted_url_components = (urllib.quote(component)
                                         for component in url_components)
                raw = urlparse.urlunsplit(quoted_url_components)

                acceptable = html_cleaner.filter_a('href', raw)
                if not acceptable:
                    logging.error(
                        'Invalid URL: Sanitized URL should start with '
                        '\'http://\' or \'https://\'; received %s' % raw)
                    return u''

            return raw
        except Exception as e:
            raise TypeError('Cannot convert to sanitized URL: %s. Error: %s' %
                            (raw, e))
    def normalize(cls, raw):
        """Validates and normalizes a raw Python object."""
        try:
            assert isinstance(raw, basestring)
            raw = unicode(raw)
            if raw:
                url_components = urlparse.urlsplit(raw)
                quoted_url_components = (
                    urllib.quote(component) for component in url_components)
                raw = urlparse.urlunsplit(quoted_url_components)

                acceptable = html_cleaner.filter_a('href', raw)
                if not acceptable:
                    logging.error(
                        'Invalid URL: Sanitized URL should start with '
                        '\'http://\' or \'https://\'; received %s' % raw)
                    return u''

            return raw
        except Exception as e:
            raise TypeError('Cannot convert to sanitized URL: %s. Error: %s' %
                            (raw, e))