def __init__(self, httpResponse): encoding = httpResponse.getCharset() if not is_known_encoding( encoding ): raise ValueError('Unknown encoding: %s' % encoding) # "setBaseUrl" url = httpResponse.getURL() redirURL = httpResponse.getRedirURL() if redirURL: url = redirURL self._baseUrl = url self._baseDomain = url.getDomain() self._rootDomain = url.getRootDomain() self._encoding = httpResponse.getCharset() # To store results self._emails = [] self._re_urls = set()
def __init__(self, data, encoding=DEFAULT_ENCODING): ''' @param data: Either a string representing a URL or a 6-elems tuple representing the URL components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Simple generic test, more detailed tests in each method! >>> u = url_object('http://w3af.com/foo/bar.txt') >>> u.path '/foo/bar.txt' >>> u.scheme 'http' >>> u.getFileName() 'bar.txt' >>> u.getExtension() 'txt' >>> # # http is the default protocol, we can provide URLs with no proto # >>> u = url_object('w3af.com') >>> u.getDomain() 'w3af.com' >>> u.getProtocol() 'http' # # But we can't specify a URL without a domain! # >>> u = url_object('http://') Traceback (most recent call last): File "<stdin>", line 1, in ? ValueError: Invalid URL "http://" >>> u = url_object(u'http://w3af.com/foo/bar.txt') >>> u.path u'/foo/bar.txt' >>> u = url_object('http://w3af.org/?foo=http://w3af.com') >>> u.netloc 'w3af.org' >>> u = url_object('http://w3af.org/', encoding='x-euc-jp') Traceback (most recent call last): File "<stdin>", line 1, in ? ValueError: Invalid encoding "x-euc-jp" when creating URL. ''' self._already_calculated_url = None self._querystr = None self._changed = True self._encoding = encoding if data is None: raise ValueError('Can not build a url_object from data=None.') # Verify that the encoding is a valid one. If we don't do it here, # things might get crazy afterwards. if not is_known_encoding( encoding ): raise ValueError('Invalid encoding "%s" when creating URL.' % encoding) if isinstance(data, tuple): scheme, netloc, path, params, qs, fragment = data else: scheme, netloc, path, params, qs, fragment = \ urlparse.urlparse(data) # # This is the case when someone creates a url_object like # this: url_object('www.w3af.com') # if scheme == netloc == '' and path: # By default we set the protocol to "http" scheme = 'http' netloc = path path = '' self.scheme = scheme or u'' self.netloc = netloc or u'' self.path = path or u'' self.params = params or u'' self.querystring = qs or u'' self.fragment = fragment or u'' if not self.netloc: # The URL is invalid, we don't have a netloc! if isinstance(data, tuple): invalid_url = urlparse.urlunparse(data) else: invalid_url = data raise ValueError, 'Invalid URL "%s"' % (invalid_url,)