Example #1
0
    def __init__(self, http_response):

        encoding = http_response.get_charset()
        if not is_known_encoding(encoding):
            raise ValueError('Unknown encoding: %s' % encoding)

        # "set_base_url"
        url = http_response.get_url()
        redir_url = http_response.get_redir_url()
        if redir_url:
            url = redir_url

        self._base_url = url
        self._baseDomain = url.get_domain()
        self._rootDomain = url.get_root_domain()
        self._encoding = http_response.get_charset()

        # Not all parsers have a DOM, but we'll over-generalize just to avoid
        # having extra if statements all around the code.
        self._dom = None

        # Store the http response, this shouldn't be so bad since we're only
        # storing ParserCache.LRU_LENGTH in memory and not storing responses
        # which have more than ParserCache.MAX_CACHEABLE_BODY_LEN in length
        self._http_response = http_response
Example #2
0
    def __init__(self, data, encoding=DEFAULT_ENCODING):
        """
        :param data: Either a string representing a URL or a 6-elems tuple
            representing the URL components:
            <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

        Simple generic test, more detailed tests in each method!

        """
        # The different parts of the url
        self._querystr = None
        self._fragment = None
        self._scheme = None
        self._netloc = None
        self._path = None
        self._params = None

        # Internal attributes
        self._cache = {}
        self._encoding = encoding

        if not isinstance(data, basestring):
            raise ValueError('Can not build a URL from %s.' % type(data))

        # Verify that the encoding is a valid one. If we don't do it here,
        # things might get crazy afterwards.
        if not is_known_encoding(encoding):
            msg = 'Invalid encoding "%s" when creating URL.'
            raise ValueError(msg % encoding)

        parsed = urlparse.urlparse(data)
        #
        # This is the case when someone creates a URL like
        # this: URL('www.w3af.com')
        #
        if parsed.scheme == parsed.netloc == '' and not parsed.path.startswith(
                u'/'):
            # By default we set the protocol to "http"
            scheme = u'http'
            netloc = parsed.path
            path = u''
        else:
            scheme = parsed.scheme
            netloc = parsed.netloc
            path = parsed.path

        self.scheme = scheme or u''
        self.netloc = netloc or u''
        self.path = path or u'/'
        self.params = parsed.params or u''
        self.querystring = parsed.query or u''
        self.fragment = parsed.fragment or u''

        if not self.netloc and self.scheme != 'file':
            # The URL is invalid, we don't have a netloc!
            raise ValueError('Invalid URL "%s"' % data)

        self.normalize_url()
Example #3
0
    def __init__(self, data, encoding=DEFAULT_ENCODING):
        """
        :param data: Either a string representing a URL or a 6-elems tuple
            representing the URL components:
            <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

        Simple generic test, more detailed tests in each method!

        """
        # The different parts of the url
        self._querystr = None
        self._fragment = None
        self._scheme = None
        self._netloc = None
        self._path = None
        self._params = None

        # Internal attributes
        self._cache = {}
        self._encoding = encoding

        if not isinstance(data, basestring):
            raise ValueError('Can not build a URL from %s.' % type(data))

        # Verify that the encoding is a valid one. If we don't do it here,
        # things might get crazy afterwards.
        if not is_known_encoding(encoding):
            msg = 'Invalid encoding "%s" when creating URL.'
            raise ValueError(msg % encoding)

        scheme, netloc, path, params, qs, fragment = urlparse.urlparse(data)
        #
        # This is the case when someone creates a URL like
        # this: URL('www.w3af.com')
        #
        if scheme == netloc == '' and not path.startswith(u'/'):
            # By default we set the protocol to "http"
            scheme = u'http'
            netloc = path
            path = u''

        self.scheme = scheme or u''
        self.netloc = netloc or u''
        self.path = path or u'/'
        self.params = params or u''
        self.querystring = qs or u''
        self.fragment = fragment or u''

        if not self.netloc and self.scheme != 'file':
            # The URL is invalid, we don't have a netloc!
            raise ValueError('Invalid URL "%s"' % data)

        self.normalize_url()
Example #4
0
    def __init__(self, http_response):

        encoding = http_response.get_charset()
        if not is_known_encoding(encoding):
            raise ValueError('Unknown encoding: %s' % encoding)

        # "set_base_url"
        url = http_response.get_url()
        redir_url = http_response.get_redir_url()
        if redir_url:
            url = redir_url

        self._base_url = url
        self._baseDomain = url.get_domain()
        self._rootDomain = url.get_root_domain()
        self._encoding = http_response.get_charset()
Example #5
0
    def __init__(self, http_response):

        encoding = http_response.get_charset()
        if not is_known_encoding(encoding):
            raise ValueError('Unknown encoding: %s' % encoding)

        # "set_base_url"
        url = http_response.get_url()
        redir_url = http_response.get_redir_url()
        if redir_url:
            url = redir_url

        self._base_url = url
        self._baseDomain = url.get_domain()
        self._rootDomain = url.get_root_domain()
        self._encoding = http_response.get_charset()
Example #6
0
    def __init__(self, data, encoding=DEFAULT_ENCODING):
        """
        :param data: Either a string representing a URL or a 6-elems tuple
            representing the URL components:
            <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

        Simple generic test, more detailed tests in each method!

        """
        self._already_calculated_url = None
        self._querystr = None
        self._changed = True
        self._encoding = encoding

        if not isinstance(data, basestring):
            raise ValueError('Can not build a URL from %s.' % type(data))

        # Verify that the encoding is a valid one. If we don't do it here,
        # things might get crazy afterwards.
        if not is_known_encoding(encoding):
            msg = 'Invalid encoding "%s" when creating URL.'
            raise ValueError(msg % encoding)

        scheme, netloc, path, params, qs, fragment = urlparse.urlparse(data)
        #
        # This is the case when someone creates a URL like
        # this: URL('www.w3af.com')
        #
        if scheme == netloc == '' and not path.startswith('/'):
            # By default we set the protocol to "http"
            scheme = 'http'
            netloc = path
            path = ''

        self.scheme = scheme or u''
        self.netloc = netloc or u''
        self.path = path or u'/'
        self.params = params or u''
        self.querystring = qs or u''
        self.fragment = fragment or u''

        if not self.netloc and self.scheme != 'file':
            # The URL is invalid, we don't have a netloc!
            raise ValueError, 'Invalid URL "%s"' % (data, )

        self.normalize_url()
Example #7
0
    def __init__(self, HTTPResponse):

        encoding = HTTPResponse.get_charset()
        if not is_known_encoding(encoding):
            raise ValueError('Unknown encoding: %s' % encoding)

        # "setBaseUrl"
        url = HTTPResponse.get_url()
        redir_url = HTTPResponse.get_redir_url()
        if redir_url:
            url = redir_url

        self._base_url = url
        self._baseDomain = url.get_domain()
        self._rootDomain = url.get_root_domain()
        self._encoding = HTTPResponse.get_charset()

        # To store results
        self._emails = set()
        self._re_urls = set()
Example #8
0
    def __init__(self, HTTPResponse):

        encoding = HTTPResponse.get_charset()
        if not is_known_encoding(encoding):
            raise ValueError('Unknown encoding: %s' % encoding)

        # "setBaseUrl"
        url = HTTPResponse.get_url()
        redir_url = HTTPResponse.get_redir_url()
        if redir_url:
            url = redir_url

        self._base_url = url
        self._baseDomain = url.get_domain()
        self._rootDomain = url.get_root_domain()
        self._encoding = HTTPResponse.get_charset()

        # To store results
        self._emails = set()
        self._re_urls = set()
Example #9
0
    def __init__(self, http_response):

        encoding = http_response.get_charset()
        if not is_known_encoding(encoding):
            raise ValueError('Unknown encoding: %s' % encoding)

        # "set_base_url"
        url = http_response.get_url()
        redir_url = http_response.get_redir_url()
        if redir_url:
            url = redir_url

        self._base_url = url
        self._base_domain = url.get_domain()
        self._root_domain = url.get_root_domain()
        self._encoding = http_response.get_charset()

        # Store the http response, this shouldn't be so bad since we're only
        # storing ParserCache.LRU_LENGTH in memory and not storing responses
        # which have more than ParserCache.MAX_CACHEABLE_BODY_LEN in length
        self._http_response = http_response
Example #10
0
 def test_is_known_encoding_false(self):
     self.assertFalse(is_known_encoding('andres-16'))
Example #11
0
 def test_is_known_encoding_true(self):
     self.assertTrue(is_known_encoding('utf-8'))
Example #12
0
 def test_is_known_encoding_false(self):
     self.assertFalse(is_known_encoding('andres-16'))
Example #13
0
 def test_is_known_encoding_true(self):
     self.assertTrue(is_known_encoding('utf-8'))