def test_whitelisted_tags(self): self.assertTrue( html_cleaner.filter_a('a', 'href', 'http://www.oppia.com')) self.assertFalse( html_cleaner.filter_a('a', 'href', '<code>http://www.oppia.com')) self.assertTrue( html_cleaner.filter_a('a', 'title', 'http://www.oppia.com')) with self.assertRaises(Exception): html_cleaner.filter_a('link', 'href', 'http://www.oppia.com')
def test_whitelisted_tags(self): self.assertTrue( html_cleaner.filter_a('a', 'href', 'http://www.oppia.com')) self.assertFalse( html_cleaner.filter_a('a', 'href', '<code>http://www.oppia.com')) self.assertTrue( html_cleaner.filter_a('a', 'title', 'http://www.oppia.com')) with self.assertRaisesRegexp( Exception, 'The filter_a method should only be used for a tags.'): html_cleaner.filter_a('link', 'href', 'http://www.oppia.com')
def sanitize_url(obj: str) -> str: """Takes a string representing a URL and sanitizes it. Args: obj: str. A string representing a URL. Returns: str. An empty string if the URL does not start with http:// or https:// except when the string is empty. Otherwise, returns the original URL. Raises: AssertionError. The string is non-empty and does not start with http:// or https://. """ if obj == '': return obj url_components = python_utils.url_split(obj) # type: ignore[no-untyped-call] quoted_url_components = ( python_utils.url_quote(component) for component in url_components) # type: ignore[no-untyped-call] raw = python_utils.url_unsplit(quoted_url_components) # type: ignore[no-untyped-call] acceptable = html_cleaner.filter_a('a', 'href', obj) # type: ignore[no-untyped-call] assert acceptable, ( 'Invalid URL: Sanitized URL should start with ' '\'http://\' or \'https://\'; received %s' % raw) return raw # type: ignore[no-any-return]
def sanitize_url(obj): """Takes a string representing a URL and sanitizes it. Args: obj: a string representing a URL. Returns: An empty string if the URL does not start with http:// or https:// except when the string is empty. Otherwise, returns the original URL. Raises: AssertionError: The string is non-empty and does not start with http:// or https:// """ if obj == '': return obj url_components = urlparse.urlsplit(obj) quoted_url_components = (urllib.quote(component) for component in url_components) raw = urlparse.urlunsplit(quoted_url_components) acceptable = html_cleaner.filter_a('href', obj) assert acceptable, ('Invalid URL: Sanitized URL should start with ' '\'http://\' or \'https://\'; received %s' % raw) return raw
def sanitize_url(obj): """Takes a string representing a URL and sanitizes it. Args: obj: a string representing a URL. Returns: An empty string if the URL does not start with http:// or https://. Otherwise, returns the original URL. """ url_components = urlparse.urlsplit(obj) quoted_url_components = ( urllib.quote(component) for component in url_components) raw = urlparse.urlunsplit(quoted_url_components) acceptable = html_cleaner.filter_a('href', obj) assert acceptable, ( 'Invalid URL: Sanitized URL should start with ' '\'http://\' or \'https://\'; received %s' % raw) return raw
def normalize(cls, raw): """Validates and normalizes a raw Python object.""" try: assert isinstance(raw, basestring) raw = unicode(raw) if raw: url_components = urlparse.urlsplit(raw) quoted_url_components = (urllib.quote(component) for component in url_components) raw = urlparse.urlunsplit(quoted_url_components) acceptable = html_cleaner.filter_a('href', raw) if not acceptable: logging.error( 'Invalid URL: Sanitized URL should start with ' '\'http://\' or \'https://\'; received %s' % raw) return u'' return raw except Exception as e: raise TypeError('Cannot convert to sanitized URL: %s. Error: %s' % (raw, e))
def normalize(cls, raw): """Validates and normalizes a raw Python object.""" try: assert isinstance(raw, basestring) raw = unicode(raw) if raw: url_components = urlparse.urlsplit(raw) quoted_url_components = ( urllib.quote(component) for component in url_components) raw = urlparse.urlunsplit(quoted_url_components) acceptable = html_cleaner.filter_a('href', raw) if not acceptable: logging.error( 'Invalid URL: Sanitized URL should start with ' '\'http://\' or \'https://\'; received %s' % raw) return u'' return raw except Exception as e: raise TypeError('Cannot convert to sanitized URL: %s. Error: %s' % (raw, e))