def sanitize_url(obj): """Takes a string representing a URL and sanitizes it. Args: obj: a string representing a URL. Returns: An empty string if the URL does not start with http:// or https:// except when the string is empty. Otherwise, returns the original URL. Raises: AssertionError: The string is non-empty and does not start with http:// or https:// """ if obj == '': return obj url_components = python_utils.url_split(obj) quoted_url_components = (python_utils.url_quote(component) for component in url_components) raw = python_utils.url_unsplit(quoted_url_components) acceptable = html_cleaner.filter_a('a', 'href', obj) assert acceptable, ('Invalid URL: Sanitized URL should start with ' '\'http://\' or \'https://\'; received %s' % raw) return raw
def set_url_query_parameter(url, param_name, param_value): """Set or replace a query parameter, and return the modified URL. Args: url: str. URL string which contains the query parameter. param_name: str. Parameter name to be removed. param_value: str. Set the parameter value, if it exists. Returns: str. Formated URL that has query parameter set or replaced. Raises: Exception. If the query parameter sent is not of string type, them this exception is raised. """ if not isinstance(param_name, python_utils.BASESTRING): raise Exception( 'URL query parameter name must be a string, received %s' % param_name) scheme, netloc, path, query_string, fragment = python_utils.url_split(url) query_params = python_utils.parse_query_string(query_string) query_params[param_name] = [param_value] new_query_string = python_utils.url_encode(query_params, doseq=True) return python_utils.url_unsplit( (scheme, netloc, path, new_query_string, fragment))
def test_url_unsplit(self): response = python_utils.url_split('http://www.google.com') self.assertEqual(python_utils.url_unsplit(response), 'http://www.google.com')