예제 #1
0
 def __init__(self, httpResponse):
     
     encoding = httpResponse.getCharset()
     if not is_known_encoding( encoding ):
         raise ValueError('Unknown encoding: %s' % encoding)
     
     # "setBaseUrl"
     url = httpResponse.getURL()
     redirURL = httpResponse.getRedirURL()
     if redirURL:
         url = redirURL
     
     self._baseUrl = url
     self._baseDomain = url.getDomain()
     self._rootDomain = url.getRootDomain()
     self._encoding = httpResponse.getCharset()
     
     # To store results
     self._emails = []
     self._re_urls = set()
예제 #2
0
    def __init__(self, data, encoding=DEFAULT_ENCODING):
        '''
        @param data: Either a string representing a URL or a 6-elems tuple
            representing the URL components:
            <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

        Simple generic test, more detailed tests in each method!
        
        >>> u = url_object('http://w3af.com/foo/bar.txt')
        >>> u.path
        '/foo/bar.txt'
        >>> u.scheme
        'http'
        >>> u.getFileName()
        'bar.txt'
        >>> u.getExtension()
        'txt'
        >>> 

        #
        # http is the default protocol, we can provide URLs with no proto
        #
        >>> u = url_object('w3af.com')
        >>> u.getDomain()
        'w3af.com'
        >>> u.getProtocol()
        'http'

        #
        # But we can't specify a URL without a domain!
        #
        >>> u = url_object('http://')
        Traceback (most recent call last):
          File "<stdin>", line 1, in ?
        ValueError: Invalid URL "http://"

        >>> u = url_object(u'http://w3af.com/foo/bar.txt')
        >>> u.path
        u'/foo/bar.txt'

        >>> u = url_object('http://w3af.org/?foo=http://w3af.com')
        >>> u.netloc
        'w3af.org'

        >>> u = url_object('http://w3af.org/', encoding='x-euc-jp')
        Traceback (most recent call last):
          File "<stdin>", line 1, in ?
        ValueError: Invalid encoding "x-euc-jp" when creating URL.
    
        '''
        self._already_calculated_url = None
        self._querystr = None
        self._changed = True
        self._encoding = encoding

        if data is None:
            raise ValueError('Can not build a url_object from data=None.')

        # Verify that the encoding is a valid one. If we don't do it here,
        # things might get crazy afterwards.
        if not is_known_encoding( encoding ):
            raise ValueError('Invalid encoding "%s" when creating URL.' % encoding)

        if isinstance(data, tuple):
            scheme, netloc, path, params, qs, fragment = data
        else:
            scheme, netloc, path, params, qs, fragment = \
                                        urlparse.urlparse(data)
            #
            # This is the case when someone creates a url_object like
            # this: url_object('www.w3af.com')
            #
            if scheme == netloc == '' and path:
                # By default we set the protocol to "http"
                scheme = 'http'
                netloc = path
                path = ''
        
        self.scheme = scheme or u''
        self.netloc = netloc or u''
        self.path = path or u''
        self.params = params or u''
        self.querystring = qs or u''
        self.fragment = fragment or u''

        if not self.netloc:
            # The URL is invalid, we don't have a netloc!
            if isinstance(data, tuple):
                invalid_url = urlparse.urlunparse(data)
            else:
                invalid_url = data 
            raise ValueError, 'Invalid URL "%s"' % (invalid_url,)