Python BaseParserの例、core.data.parsers.baseparser.BaseParser Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_baseparser.py プロジェクト: Adastra-thw/w3af

    def test_decode_url_url_encoded(self):
        u = URL('http://www.w3af.com/')
        response = HTTPResponse(200, u'', Headers(), u, u, charset='latin1')
        bp_inst = BaseParser(response)
        bp_inst._encoding = 'latin1'

        decoded_url = bp_inst._decode_url(u'http://www.w3af.com/ind%E9x.html')
        self.assertEqual(decoded_url, u'http://www.w3af.com/ind\xe9x.html')

コード例 #2

0

ファイルを表示

ファイル: test_baseparser.py プロジェクト: Adastra-thw/w3af

    def test_decode_url_ignore_errors(self):
        u = URL('http://www.w3af.com/')
        response = HTTPResponse(200, u'', Headers(), u, u, charset='latin1')
        bp_inst = BaseParser(response)
        bp_inst._encoding = 'utf-8'

        decoded_url = bp_inst._decode_url(
            u'http://w3af.com/blah.jsp?p=SQU-300&bgc=%FFAAAA')
        self.assertEqual(
            decoded_url, u'http://w3af.com/blah.jsp?p=SQU-300&bgc=AAAA')

コード例 #3

0

ファイルを表示

ファイル: test_baseparser.py プロジェクト: Adastra-thw/w3af

    def test_decode_url_skip_safe_chars(self):
        u = URL('http://www.w3af.com/')
        response = HTTPResponse(200, u'', Headers(), u, u, charset='latin1')
        bp_inst = BaseParser(response)
        bp_inst._encoding = 'latin1'

        decoded_url = bp_inst._decode_url(
            u'http://w3af.com/search.php?a=%00x&b=2%20c=3%D1')
        self.assertEqual(
            decoded_url, u'http://w3af.com/search.php?a=%00x&b=2 c=3\xd1')

コード例 #4

0

ファイルを表示

ファイル: test_baseparser.py プロジェクト: Adastra-thw/w3af

    def test_get_emails_filter(self):
        response = HTTPResponse(200, '', Headers(), self.url, self.url)
        bp_inst = BaseParser(response)
        bp_inst._emails = ['*****@*****.**', '*****@*****.**']

        self.assertEqual(
            bp_inst.get_emails(), ['*****@*****.**', '*****@*****.**'])

        self.assertEqual(bp_inst.get_emails(domain='w3af.com'), ['*****@*****.**'])
        self.assertEqual(
            bp_inst.get_emails(domain='not-w3af.com'), ['*****@*****.**'])

コード例 #5

0

ファイルを表示

ファイル: test_baseparser.py プロジェクト: Adastra-thw/w3af

    def test_parse_blank(self):
        response = HTTPResponse(200, '', Headers(), self.url, self.url)
        bp_inst = BaseParser(response)

        self.assertEqual(bp_inst.get_emails(), set())

        self.assertRaises(NotImplementedError, bp_inst.get_comments)
        self.assertRaises(NotImplementedError, bp_inst.get_forms)
        self.assertRaises(NotImplementedError, bp_inst.get_meta_redir)
        self.assertRaises(NotImplementedError, bp_inst.get_meta_tags)
        self.assertRaises(NotImplementedError, bp_inst.get_references)
        self.assertRaises(NotImplementedError, bp_inst.get_scripts)

コード例 #6

0

ファイルを表示

    def __init__(self, HTTPResponse):
        BaseParser.__init__(self, HTTPResponse)

        swf = HTTPResponse.get_body()
        if self._is_compressed(swf):
            try:
                swf = self._inflate(swf)
            except Exception:
                # If the inflate fails... there is nothing else to do.
                return

        self._parse(swf)

コード例 #7

0

ファイルを表示

    def __init__(self, http_resp):
        BaseParser.__init__(self, http_resp)

        # Internal state variables
        self._inside_form = False
        self._inside_select = False
        self._inside_textarea = False
        self._inside_script = False

        # Internal containers
        self._tag_and_url = set()
        self._parsed_urls = set()
        self._forms = []
        self._comments_in_doc = []
        self._scripts_in_doc = []
        self._meta_redirs = []
        self._meta_tags = []

        # Do some stuff before actually parsing
        self._pre_parse(http_resp)

        # Parse!
        self._parse(http_resp)

コード例 #8

0

ファイルを表示

ファイル: sgml.py プロジェクト: Adastra-thw/w3af

    def __init__(self, http_resp):
        BaseParser.__init__(self, http_resp)

        # Internal state variables
        self._inside_form = False
        self._inside_select = False
        self._inside_textarea = False
        self._inside_script = False

        # Internal containers
        self._tag_and_url = set()
        self._parsed_urls = set()
        self._forms = []
        self._comments_in_doc = []
        self._scripts_in_doc = []
        self._meta_redirs = []
        self._meta_tags = []

        # Do some stuff before actually parsing
        self._pre_parse(http_resp)

        # Parse!
        self._parse(http_resp)

コード例 #9

0

ファイルを表示

ファイル: test_baseparser.py プロジェクト: Adastra-thw/w3af

 def setUp(self):
     self.url = URL('http://www.w3af.com/')
     response = HTTPResponse(200, '', Headers(), self.url, self.url)
     self.bp_inst = BaseParser(response)

コード例 #10

0

ファイルを表示

ファイル: test_baseparser.py プロジェクト: Adastra-thw/w3af

class TestBaseParser(unittest.TestCase):

    def setUp(self):
        self.url = URL('http://www.w3af.com/')
        response = HTTPResponse(200, '', Headers(), self.url, self.url)
        self.bp_inst = BaseParser(response)

    def test_parse_blank(self):
        response = HTTPResponse(200, '', Headers(), self.url, self.url)
        bp_inst = BaseParser(response)

        self.assertEqual(bp_inst.get_emails(), set())

        self.assertRaises(NotImplementedError, bp_inst.get_comments)
        self.assertRaises(NotImplementedError, bp_inst.get_forms)
        self.assertRaises(NotImplementedError, bp_inst.get_meta_redir)
        self.assertRaises(NotImplementedError, bp_inst.get_meta_tags)
        self.assertRaises(NotImplementedError, bp_inst.get_references)
        self.assertRaises(NotImplementedError, bp_inst.get_scripts)

    def test_get_emails_filter(self):
        response = HTTPResponse(200, '', Headers(), self.url, self.url)
        bp_inst = BaseParser(response)
        bp_inst._emails = ['*****@*****.**', '*****@*****.**']

        self.assertEqual(
            bp_inst.get_emails(), ['*****@*****.**', '*****@*****.**'])

        self.assertEqual(bp_inst.get_emails(domain='w3af.com'), ['*****@*****.**'])
        self.assertEqual(
            bp_inst.get_emails(domain='not-w3af.com'), ['*****@*****.**'])

    def test_extract_emails_blank(self):
        self.assertEqual(self.bp_inst._extract_emails(''), set())

    def test_extract_emails_simple(self):
        input_str = u' [email protected] '
        expected_res = set([u'*****@*****.**'])
        self.assertEqual(self.bp_inst._extract_emails(input_str),
                         expected_res)

    def test_extract_emails_mailto(self):
        input_str = u'<a href="mailto:[email protected]">test</a>'
        expected_res = set([u'*****@*****.**'])
        self.assertEqual(self.bp_inst._extract_emails(input_str),
                         expected_res)

    def test_extract_emails_mailto_dup(self):
        input_str = u'<a href="mailto:[email protected]">[email protected]</a>'
        expected_res = set([u'*****@*****.**'])
        self.assertEqual(self.bp_inst._extract_emails(input_str),
                         expected_res)

    def test_extract_emails_mailto_not_dup(self):
        input_str = u'<a href="mailto:[email protected]">[email protected]</a>'
        expected_res = set([u'*****@*****.**', u'*****@*****.**'])
        self.assertEqual(self.bp_inst._extract_emails(input_str),
                         expected_res)

    def test_extract_emails_dash(self):
        input_str = u'header [email protected] footer'
        expected_res = set([u'*****@*****.**'])
        self.assertEqual(self.bp_inst._extract_emails(input_str),
                         expected_res)

    def test_extract_emails_number(self):
        input_str = u'header [email protected] footer'
        expected_res = set([u'*****@*****.**'])
        self.assertEqual(self.bp_inst._extract_emails(input_str),
                         expected_res)

    def test_regex_url_parse_blank(self):
        self.bp_inst._regex_url_parse('')
        self.assertEqual(self.bp_inst._re_urls, set())

    def test_regex_url_parse_full_url(self):
        input_str = u'header http://www.w3af.com/foo/bar/index.html footer'
        expected_urls = set([URL('http://www.w3af.com/foo/bar/index.html'), ])

        self.bp_inst._regex_url_parse(input_str)

        self.assertEqual(expected_urls, self.bp_inst._re_urls)

    def test_regex_url_parse_relative_url_paths(self):
        input_str = u'header /foo/bar/index.html footer'
        expected_urls = set([URL('http://www.w3af.com/foo/bar/index.html'), ])

        self.bp_inst._regex_url_parse(input_str)

        self.assertEqual(expected_urls, self.bp_inst._re_urls)

    def test_regex_url_parse_relative_url_slash_file(self):
        input_str = u'header /subscribe.jsp footer'
        expected_urls = set([URL('http://www.w3af.com/subscribe.jsp'), ])

        self.bp_inst._regex_url_parse(input_str)

        self.assertEqual(expected_urls, self.bp_inst._re_urls)

    def test_regex_url_parse_relative_url_file_only(self):
        '''
        Please note that the expected output in this case is an empty set,
        adding support to parse "files" is possible, but too greedy and
        leads to lots of "parser false positives".
        '''
        input_str = u'header subscribe.jsp footer'
        expected_urls = set()

        self.bp_inst._regex_url_parse(input_str)

        self.assertEqual(expected_urls, self.bp_inst._re_urls)

    def test_regex_url_parse_relative_url_a_tag(self):
        input_str = u'header <a href="/foo/bar/index.html">foo</a> footer'
        expected_urls = set([URL('http://www.w3af.com/foo/bar/index.html'), ])

        self.bp_inst._regex_url_parse(input_str)

        self.assertEqual(expected_urls, self.bp_inst._re_urls)

    def test_regex_url_parse_relative_no_slash(self):
        input_str = u'header <a href="index">foo</a> footer'
        expected_urls = set()

        self.bp_inst._regex_url_parse(input_str)

        self.assertEqual(expected_urls, self.bp_inst._re_urls)

    def test_decode_url_simple(self):
        u = URL('http://www.w3af.com/')
        response = HTTPResponse(200, u'', Headers(), u, u, charset='latin1')
        bp_inst = BaseParser(response)
        bp_inst._encoding = 'latin1'

        decoded_url = bp_inst._decode_url(u'http://www.w3af.com/index.html')
        self.assertEqual(decoded_url, u'http://www.w3af.com/index.html')

    def test_decode_url_url_encoded(self):
        u = URL('http://www.w3af.com/')
        response = HTTPResponse(200, u'', Headers(), u, u, charset='latin1')
        bp_inst = BaseParser(response)
        bp_inst._encoding = 'latin1'

        decoded_url = bp_inst._decode_url(u'http://www.w3af.com/ind%E9x.html')
        self.assertEqual(decoded_url, u'http://www.w3af.com/ind\xe9x.html')

    def test_decode_url_skip_safe_chars(self):
        u = URL('http://www.w3af.com/')
        response = HTTPResponse(200, u'', Headers(), u, u, charset='latin1')
        bp_inst = BaseParser(response)
        bp_inst._encoding = 'latin1'

        decoded_url = bp_inst._decode_url(
            u'http://w3af.com/search.php?a=%00x&b=2%20c=3%D1')
        self.assertEqual(
            decoded_url, u'http://w3af.com/search.php?a=%00x&b=2 c=3\xd1')

    def test_decode_url_ignore_errors(self):
        u = URL('http://www.w3af.com/')
        response = HTTPResponse(200, u'', Headers(), u, u, charset='latin1')
        bp_inst = BaseParser(response)
        bp_inst._encoding = 'utf-8'

        decoded_url = bp_inst._decode_url(
            u'http://w3af.com/blah.jsp?p=SQU-300&bgc=%FFAAAA')
        self.assertEqual(
            decoded_url, u'http://w3af.com/blah.jsp?p=SQU-300&bgc=AAAA')