def test_inputs_in_out_form(self): # We expect that the form contains all the inputs (both those declared # before and after). Also it must be equal to a form that includes # those same inputs but declared before them # 1st body body = HTML_DOC % \ {'head': '', 'body': (INPUT_TEXT_WITH_NAME + INPUT_TEXT_WITH_ID + INPUT_FILE_WITH_NAME + INPUT_SUBMIT_WITH_NAME + (FORM_WITHOUT_METHOD % {'form_content': ''}) + # form in the middle INPUT_RADIO_WITH_NAME + INPUT_CHECKBOX_WITH_NAME + INPUT_HIDDEN) } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() # Only one form self.assertTrue(len(p.forms) == 1) # Ensure that parsed inputs actually belongs to the form and # have the expected values f = p.forms[0] self.assertEquals(['bar'], f['foo1']) # text input self.assertEquals(['bar'], f['foo2']) # text input self.assertEquals([''], f['foo5']) # radio input self.assertEquals([''], f['foo6']) # checkbox input self.assertEquals(['bar'], f['foo7']) # hidden input self.assertEquals([''], f['foo4']) # submit input self.assertEquals([''], f['foo3']) # file input # 2nd body body2 = HTML_DOC % \ {'head': '', 'body': FORM_WITHOUT_METHOD % {'form_content': INPUT_TEXT_WITH_NAME + INPUT_TEXT_WITH_ID + INPUT_FILE_WITH_NAME + INPUT_SUBMIT_WITH_NAME + INPUT_RADIO_WITH_NAME + INPUT_CHECKBOX_WITH_NAME + INPUT_HIDDEN } } resp2 = build_http_response(self.url, body2) p2 = RaiseHTMLParser(resp2) p2.parse() # Finally assert that the parsed forms are equals self.assertEquals(f, p2.forms[0])
def test_inputs_in_out_form(self): # We expect that the form contains all the inputs (both those declared # before and after). Also it must be equal to a form that includes # those same inputs but declared before them # 1st body body = HTML_DOC % \ {'head': '', 'body': (INPUT_TEXT_WITH_NAME + INPUT_TEXT_WITH_ID + INPUT_FILE_WITH_NAME + INPUT_SUBMIT_WITH_NAME + (FORM_WITHOUT_METHOD % {'form_content': ''}) + # form in the middle INPUT_RADIO_WITH_NAME + INPUT_CHECKBOX_WITH_NAME + INPUT_HIDDEN) } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() # Only one form self.assertTrue(len(p.forms) == 1) # Ensure that parsed inputs actually belongs to the form and # have the expected values f = p.forms[0] self.assertEquals(['bar'], f['foo1']) # text input self.assertEquals(['bar'], f['foo2']) # text input self.assertEquals([''], f['foo5']) # radio input self.assertEquals([''], f['foo6']) # checkbox input self.assertEquals(['bar'], f['foo7']) # hidden input self.assertEquals([''], f['foo4']) # submit input self.assertEquals(['bar'], f['foo3']) # file input # 2nd body body2 = HTML_DOC % \ {'head': '', 'body': FORM_WITHOUT_METHOD % {'form_content': INPUT_TEXT_WITH_NAME + INPUT_TEXT_WITH_ID + INPUT_FILE_WITH_NAME + INPUT_SUBMIT_WITH_NAME + INPUT_RADIO_WITH_NAME + INPUT_CHECKBOX_WITH_NAME + INPUT_HIDDEN } } resp2 = build_http_response(self.url, body2) p2 = RaiseHTMLParser(resp2) p2.parse() # Finally assert that the parsed forms are equals self.assertEquals(f, p2.forms[0])
def test_script_tag_link_extraction_relative(self): body = '''<script>window.location = "/foo.php";</script>''' resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() self.assertEquals([URL('http://w3af.com/foo.php')], p.references[1])
def test_selects_in_out_form(self): # Both <select> are expected to be parsed inside the form. Because # they have the same name/id the same entry will be used in the form # although the values will be duplicated when applies. body = HTML_DOC % \ {'head': '', 'body': ( SELECT_WITH_NAME + FORM_WITHOUT_METHOD % {'form_content': SELECT_WITH_ID} + '<select><option value="xxx"/><option value="yyy"/></select>') } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() # No pending parsed selects self.assertEquals(0, len(p._select_option_values)) # Only 1 select (2 have the same name); the last one is not parsed as # it has no name/id f = p.forms[0] # meta has all the values select_values = f.meta['vehicle'][0].values self.assertIn('car', select_values) self.assertIn('plane', select_values) self.assertIn('bike', select_values) # The "current" value is the first that was found self.assertEqual(f['vehicle'], ['car']) # "xxx" and "yyy" options were not parsed because they are outside the # form tag and doesn't have a name attribute self.assertNotIn('xxx', f.get_option_names()) self.assertNotIn('yyy', f.get_option_names())
def test_a_link_absolute(self): headers = Headers([('content-type', 'text/html')]) resp = build_http_response(self.url, A_LINK_ABSOLUTE, headers=headers) p = RaiseHTMLParser(resp) p.parse() self.assertEquals([URL('http://w3af.com/home.php')], p.references[0])
def test_script_tag_link_extraction_relative(self): body = '''<script>window.location = "/foo.php";</script>''' resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() self.assertEquals([URL('http://w3af.com/foo.php')], p.references[1])
def test_selects_in_out_form(self): # Both <select> are expected to be parsed inside the form. Because # they have the same name/id the same entry will be used in the form # although the values will be duplicated when applies. body = HTML_DOC % \ {'head': '', 'body': ( SELECT_WITH_NAME + FORM_WITHOUT_METHOD % {'form_content': SELECT_WITH_ID} + '<select><option value="xxx"/><option value="yyy"/></select>') } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() # No pending parsed selects self.assertEquals(0, len(p._select_option_values)) # Only 1 select (2 have the same name); the last one is not parsed as # it has no name/id f = p.forms[0] # meta has all the values select_values = f.meta['vehicle'][0].values self.assertIn('car', select_values) self.assertIn('plane', select_values) self.assertIn('bike', select_values) # The "current" value is the first that was found self.assertEqual(f['vehicle'], ['car']) # "xxx" and "yyy" options were not parsed because they are outside the # form tag and doesn't have a name attribute self.assertNotIn('xxx', f.get_option_names()) self.assertNotIn('yyy', f.get_option_names())
def test_a_link_absolute(self): headers = Headers([('content-type', 'text/html')]) resp = build_http_response(self.url, A_LINK_ABSOLUTE, headers=headers) p = RaiseHTMLParser(resp) p.parse() self.assertEquals([URL('http://w3af.com/home.php')], p.references[0])
def test_forms(self): body = HTML_DOC % \ {'head': '', 'body': FORM_METHOD_GET % {'form_content': ''} + FORM_WITHOUT_ACTION % {'form_content': ''} } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() self.assertEquals(2, len(p.forms))
def test_forms(self): body = HTML_DOC % \ {'head': '', 'body': FORM_METHOD_GET % {'form_content': ''} + FORM_WITHOUT_ACTION % {'form_content': ''} } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() self.assertEquals(2, len(p.forms))
def test_unicodedecoreerror_ascii_url(self): HTML_FILE = os.path.join(ROOT_PATH, 'core', 'data', 'parsers', 'doc', 'tests', 'data', 'se.html') body = file(HTML_FILE).read() headers = Headers() headers['content-type'] = 'text/html; charset=utf-8' r = build_http_response(self.url, body, headers=headers) p = RaiseHTMLParser(r) p.parse()
def test_form_without_method(self): """ When the form has no 'method' => 'GET' will be used """ body = HTML_DOC % \ {'head': '', 'body': FORM_WITHOUT_METHOD % {'form_content': ''}} resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() self.assertEquals('GET', p.forms[0].get_method())
def test_form_without_method(self): """ When the form has no 'method' => 'GET' will be used """ body = HTML_DOC % \ {'head': '', 'body': FORM_WITHOUT_METHOD % {'form_content': ''}} resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() self.assertEquals('GET', p.forms[0].get_method())
def test_no_forms(self): # No form should be parsed body = HTML_DOC % \ {'head': '', 'body': (INPUT_TEXT_WITH_NAME + INPUT_HIDDEN + SELECT_WITH_ID + TEXTAREA_WITH_ID_AND_DATA + INPUT_FILE_WITH_NAME) } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() self.assertEquals(0, len(p.forms))
def test_no_forms(self): # No form should be parsed body = HTML_DOC % \ {'head': '', 'body': (INPUT_TEXT_WITH_NAME + INPUT_HIDDEN + SELECT_WITH_ID + TEXTAREA_WITH_ID_AND_DATA + INPUT_FILE_WITH_NAME) } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() self.assertEquals(0, len(p.forms))
def test_form_without_action(self): """ If the form has no 'content' => HTTPResponse's url will be used """ body = HTML_DOC % \ {'head': '', 'body': FORM_WITHOUT_ACTION % {'form_content': ''} } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() self.assertEquals(self.url, p.forms[0].get_action())
def test_form_without_action(self): """ If the form has no 'content' => HTTPResponse's url will be used """ body = HTML_DOC % \ {'head': '', 'body': FORM_WITHOUT_ACTION % {'form_content': ''} } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() self.assertEquals(self.url, p.forms[0].get_action())
def test_form_with_invalid_url_in_action(self): """ If an invalid URL is detected in the form's action then use base_url """ body = """ <html> <form action="javascript:history.back(1)"> </form> </html>""" r = build_http_response(self.url, body) p = RaiseHTMLParser(r) p.parse() self.assertEquals(self.url, p.forms[0].get_action())
def test_form_with_invalid_url_in_action(self): """ If an invalid URL is detected in the form's action then use base_url """ body = """ <html> <form action="javascript:history.back(1)"> </form> </html>""" r = build_http_response(self.url, body) p = RaiseHTMLParser(r) p.parse() self.assertEquals(self.url, p.forms[0].get_action())
def test_form_multiline_tags(self): """ Found this form on the wild and was unable to parse it. """ resp = build_http_response(self.url, FORM_MULTILINE_TAGS) p = RaiseHTMLParser(resp) p.parse() self.assertEqual(1, len(p.forms)) form = p.forms[0] self.assertEquals(self.url, form.get_action()) self.assertEquals('POST', form.get_method()) self.assertIn('input', form) self.assertIn('csrfmiddlewaretoken', form)
def test_form_multiline_tags(self): """ Found this form on the wild and was unable to parse it. """ resp = build_http_response(self.url, FORM_MULTILINE_TAGS) p = RaiseHTMLParser(resp) p.parse() self.assertEqual(1, len(p.forms)) form = p.forms[0] self.assertEquals(self.url, form.get_action()) self.assertEquals('POST', form.get_method()) self.assertIn('input', form) self.assertIn('csrfmiddlewaretoken', form)
def test_complex_form_parse_and_variants(self): """ Reported by one of our partners. The issue seems to be that there are too many variants being generated. """ body = file(self.COMPLEX_FORM).read() resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() mode = cf.cf.get('form_fuzzing_mode') form_params = p.forms[0] self.assertEqual(len([fv for fv in form_params.get_variants(mode)]), form_params.TOP_VARIANTS + 1) self.assertEqual(len(form_params.meta.keys()), 31) self.assertEqual(form_params.meta.keys(), self.EXPECTED_PARAMS)
def test_tricky_multipart_get_form_11997(self): body = """ <html> <form action="" method="get" enctype="multipart/form-data"> <input type="text" name="test" value="тест"> <input type="submit" name="submit"> </form> </html>""" r = build_http_response(self.url, body) p = RaiseHTMLParser(r) p.parse() self.assertEqual(len(p.forms), 1) form = p.forms[0] self.assertEqual(form.get_method(), 'GET') self.assertIsInstance(form, FormParameters) self.assertEqual(form.get_form_encoding(), DEFAULT_FORM_ENCODING)
def test_complex_form_parse_and_variants(self): """ Reported by one of our partners. The issue seems to be that there are too many variants being generated. """ body = file(self.COMPLEX_FORM).read() resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() mode = cf.cf.get('form_fuzzing_mode') form_params = p.forms[0] self.assertEqual(len([fv for fv in form_params.get_variants(mode)]), form_params.TOP_VARIANTS + 1) self.assertEqual(len(form_params.meta.keys()), 31) self.assertEqual(form_params.meta.keys(), self.EXPECTED_PARAMS)
def test_form_with_repeated_parameter_names(self): # Setup form = FORM_METHOD_POST % { 'form_content': TEXTAREA_WITH_NAME_AND_DATA * 2 } body = HTML_DOC % {'head': '', 'body': form} resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) # Run the parser p.parse() # Asserts self.assertEquals(1, len(p.forms)) form = p.forms[0] self.assertIsInstance(form, FormParameters) self.assertEqual(form['sample_name'], ['sample_value', 'sample_value'])
def test_tricky_multipart_get_form_11997(self): body = """ <html> <form action="" method="get" enctype="multipart/form-data"> <input type="text" name="test" value="тест"> <input type="submit" name="submit"> </form> </html>""" r = build_http_response(self.url, body) p = RaiseHTMLParser(r) p.parse() self.assertEqual(len(p.forms), 1) form = p.forms[0] self.assertEqual(form.get_method(), 'GET') self.assertIsInstance(form, FormParameters) self.assertEqual(form.get_form_encoding(), DEFAULT_FORM_ENCODING)
def test_form_with_repeated_parameter_names(self): # Setup form = FORM_METHOD_POST % {'form_content': TEXTAREA_WITH_NAME_AND_DATA * 2} body = HTML_DOC % {'head': '', 'body': form} resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) # Run the parser p.parse() # Asserts self.assertEquals(1, len(p.forms)) form = p.forms[0] self.assertIsInstance(form, FormParameters) self.assertEqual(form['sample_name'], ['sample_value', 'sample_value'])
def test_textareas_in_out_form(self): body = HTML_DOC % \ {'head': '', 'body': ( TEXTAREA_WITH_ID_AND_DATA + FORM_WITHOUT_METHOD % {'form_content': TEXTAREA_WITH_NAME_AND_DATA} + TEXTAREA_WITH_NAME_EMPTY) } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() # textarea are parsed as regular inputs f = p.forms[0] self.assertEqual(f.get('sample_id'), f.get('sample_name')) self.assertEqual(f.get('sample_id'), ['sample_value']) # Last <textarea> with empty name wasn't parsed self.assertEquals(2, len(f))
def test_textareas_in_out_form(self): body = HTML_DOC % \ {'head': '', 'body': ( TEXTAREA_WITH_ID_AND_DATA + FORM_WITHOUT_METHOD % {'form_content': TEXTAREA_WITH_NAME_AND_DATA} + TEXTAREA_WITH_NAME_EMPTY) } resp = build_http_response(self.url, body) p = RaiseHTMLParser(resp) p.parse() # textarea are parsed as regular inputs f = p.forms[0] self.assertEqual(f.get('sample_id'), f.get('sample_name')) self.assertEqual(f.get('sample_id'), ['sample_value']) # Last <textarea> with empty name wasn't parsed self.assertEquals(2, len(f))
def test_form_exclude_zero_of_two(self): user_value = '[{"action": "/foo", "method": "post"}, {"action": "/nomatch", "method": "post"}]' cf.cf.save('form_id_list', FormIDMatcherList(user_value)) body = """ <html> <form action="/foo" method="get"> <input type="text" name="test" value="hello"> <input type="submit" name="submit"> </form> <form action="/bar" method="post"> <input type="text" name="test" value="hello"> <input type="submit" name="submit"> </form> </html>""" r = build_http_response(self.url, body) p = RaiseHTMLParser(r) p.parse() self.assertEqual(len(p.forms), 2)
def test_form_exclude_zero_of_two(self): user_value = '[{"action": "/foo", "method": "post"}, {"action": "/nomatch", "method": "post"}]' cf.cf.save('form_id_list', FormIDMatcherList(user_value)) body = """ <html> <form action="/foo" method="get"> <input type="text" name="test" value="hello"> <input type="submit" name="submit"> </form> <form action="/bar" method="post"> <input type="text" name="test" value="hello"> <input type="submit" name="submit"> </form> </html>""" r = build_http_response(self.url, body) p = RaiseHTMLParser(r) p.parse() self.assertEqual(len(p.forms), 2)
def test_form_with_invalid_enctype(self): body = """ <html> <form action="" method="get" enctype="ilove/bugs"> <input type="text" name="test" value="hello"> <input type="submit" name="submit"> </form> </html>""" r = build_http_response(self.url, body) p = RaiseHTMLParser(r) p.parse() self.assertEqual(len(p.forms), 1) form = p.forms[0] self.assertEqual(form.get_method(), 'GET') self.assertIsInstance(form, FormParameters) self.assertEqual(form.get_form_encoding(), 'ilove/bugs') # But it translates to url-encoded form afterwards dc = dc_from_form_params(form) self.assertIsInstance(dc, URLEncodedForm)
def test_form_with_invalid_enctype(self): body = """ <html> <form action="" method="get" enctype="ilove/bugs"> <input type="text" name="test" value="hello"> <input type="submit" name="submit"> </form> </html>""" r = build_http_response(self.url, body) p = RaiseHTMLParser(r) p.parse() self.assertEqual(len(p.forms), 1) form = p.forms[0] self.assertEqual(form.get_method(), 'GET') self.assertIsInstance(form, FormParameters) self.assertEqual(form.get_form_encoding(), 'ilove/bugs') # But it translates to url-encoded form afterwards dc = dc_from_form_params(form) self.assertIsInstance(dc, URLEncodedForm)