def __init__(self, http_response): encoding = http_response.get_charset() if not is_known_encoding(encoding): raise ValueError('Unknown encoding: %s' % encoding) # "set_base_url" url = http_response.get_url() redir_url = http_response.get_redir_url() if redir_url: url = redir_url self._base_url = url self._baseDomain = url.get_domain() self._rootDomain = url.get_root_domain() self._encoding = http_response.get_charset() # Not all parsers have a DOM, but we'll over-generalize just to avoid # having extra if statements all around the code. self._dom = None # Store the http response, this shouldn't be so bad since we're only # storing ParserCache.LRU_LENGTH in memory and not storing responses # which have more than ParserCache.MAX_CACHEABLE_BODY_LEN in length self._http_response = http_response
def __init__(self, data, encoding=DEFAULT_ENCODING): """ :param data: Either a string representing a URL or a 6-elems tuple representing the URL components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Simple generic test, more detailed tests in each method! """ # The different parts of the url self._querystr = None self._fragment = None self._scheme = None self._netloc = None self._path = None self._params = None # Internal attributes self._cache = {} self._encoding = encoding if not isinstance(data, basestring): raise ValueError('Can not build a URL from %s.' % type(data)) # Verify that the encoding is a valid one. If we don't do it here, # things might get crazy afterwards. if not is_known_encoding(encoding): msg = 'Invalid encoding "%s" when creating URL.' raise ValueError(msg % encoding) parsed = urlparse.urlparse(data) # # This is the case when someone creates a URL like # this: URL('www.w3af.com') # if parsed.scheme == parsed.netloc == '' and not parsed.path.startswith( u'/'): # By default we set the protocol to "http" scheme = u'http' netloc = parsed.path path = u'' else: scheme = parsed.scheme netloc = parsed.netloc path = parsed.path self.scheme = scheme or u'' self.netloc = netloc or u'' self.path = path or u'/' self.params = parsed.params or u'' self.querystring = parsed.query or u'' self.fragment = parsed.fragment or u'' if not self.netloc and self.scheme != 'file': # The URL is invalid, we don't have a netloc! raise ValueError('Invalid URL "%s"' % data) self.normalize_url()
def __init__(self, data, encoding=DEFAULT_ENCODING): """ :param data: Either a string representing a URL or a 6-elems tuple representing the URL components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Simple generic test, more detailed tests in each method! """ # The different parts of the url self._querystr = None self._fragment = None self._scheme = None self._netloc = None self._path = None self._params = None # Internal attributes self._cache = {} self._encoding = encoding if not isinstance(data, basestring): raise ValueError('Can not build a URL from %s.' % type(data)) # Verify that the encoding is a valid one. If we don't do it here, # things might get crazy afterwards. if not is_known_encoding(encoding): msg = 'Invalid encoding "%s" when creating URL.' raise ValueError(msg % encoding) scheme, netloc, path, params, qs, fragment = urlparse.urlparse(data) # # This is the case when someone creates a URL like # this: URL('www.w3af.com') # if scheme == netloc == '' and not path.startswith(u'/'): # By default we set the protocol to "http" scheme = u'http' netloc = path path = u'' self.scheme = scheme or u'' self.netloc = netloc or u'' self.path = path or u'/' self.params = params or u'' self.querystring = qs or u'' self.fragment = fragment or u'' if not self.netloc and self.scheme != 'file': # The URL is invalid, we don't have a netloc! raise ValueError('Invalid URL "%s"' % data) self.normalize_url()
def __init__(self, http_response): encoding = http_response.get_charset() if not is_known_encoding(encoding): raise ValueError('Unknown encoding: %s' % encoding) # "set_base_url" url = http_response.get_url() redir_url = http_response.get_redir_url() if redir_url: url = redir_url self._base_url = url self._baseDomain = url.get_domain() self._rootDomain = url.get_root_domain() self._encoding = http_response.get_charset()
def __init__(self, data, encoding=DEFAULT_ENCODING): """ :param data: Either a string representing a URL or a 6-elems tuple representing the URL components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Simple generic test, more detailed tests in each method! """ self._already_calculated_url = None self._querystr = None self._changed = True self._encoding = encoding if not isinstance(data, basestring): raise ValueError('Can not build a URL from %s.' % type(data)) # Verify that the encoding is a valid one. If we don't do it here, # things might get crazy afterwards. if not is_known_encoding(encoding): msg = 'Invalid encoding "%s" when creating URL.' raise ValueError(msg % encoding) scheme, netloc, path, params, qs, fragment = urlparse.urlparse(data) # # This is the case when someone creates a URL like # this: URL('www.w3af.com') # if scheme == netloc == '' and not path.startswith('/'): # By default we set the protocol to "http" scheme = 'http' netloc = path path = '' self.scheme = scheme or u'' self.netloc = netloc or u'' self.path = path or u'/' self.params = params or u'' self.querystring = qs or u'' self.fragment = fragment or u'' if not self.netloc and self.scheme != 'file': # The URL is invalid, we don't have a netloc! raise ValueError, 'Invalid URL "%s"' % (data, ) self.normalize_url()
def __init__(self, HTTPResponse): encoding = HTTPResponse.get_charset() if not is_known_encoding(encoding): raise ValueError('Unknown encoding: %s' % encoding) # "setBaseUrl" url = HTTPResponse.get_url() redir_url = HTTPResponse.get_redir_url() if redir_url: url = redir_url self._base_url = url self._baseDomain = url.get_domain() self._rootDomain = url.get_root_domain() self._encoding = HTTPResponse.get_charset() # To store results self._emails = set() self._re_urls = set()
def __init__(self, http_response): encoding = http_response.get_charset() if not is_known_encoding(encoding): raise ValueError('Unknown encoding: %s' % encoding) # "set_base_url" url = http_response.get_url() redir_url = http_response.get_redir_url() if redir_url: url = redir_url self._base_url = url self._base_domain = url.get_domain() self._root_domain = url.get_root_domain() self._encoding = http_response.get_charset() # Store the http response, this shouldn't be so bad since we're only # storing ParserCache.LRU_LENGTH in memory and not storing responses # which have more than ParserCache.MAX_CACHEABLE_BODY_LEN in length self._http_response = http_response
def test_is_known_encoding_false(self): self.assertFalse(is_known_encoding('andres-16'))
def test_is_known_encoding_true(self): self.assertTrue(is_known_encoding('utf-8'))