예제 #1
0
 def _pre_parse(self, http_resp):
     '''
     @parameter http_resp: The HTTP response document that contains the
     HTML document inside its body.
     '''
     SGMLParser._pre_parse(self, http_resp)
     assert self._baseUrl, 'The base URL must be set.'
예제 #2
0
 def _handle_textarea_tag_end(self, tag):
     """
     Handler for textarea end tag
     """
     SGMLParser._handle_textarea_tag_end(self, tag)
     attrs = {'name': self._textarea_tag_name,
              'value': self._textarea_data}
     if not self._forms:
         self._saved_inputs.append(attrs)
     else:
         form_obj = self._forms[-1]
         form_obj.addInput(attrs.items())
예제 #3
0
    def _pre_parse(self, httpResponse):
        '''
        @parameter httpResponse: The HTTP response document that contains the WML
        document inside its body.

        Init,
        >>> from core.data.url.httpResponse import httpResponse as httpResponse
        >>> u = url_object('http://www.w3af.com/')
        
        Parse a simple form,
        >>> form = """
        ...    <go method="post" href="dataReceptor.php">
        ...        <postfield name="clave" value="$(clave)"/>
        ...        <postfield name="cuenta" value="$(cuenta)"/>
        ...        <postfield name="tipdat" value="D"/>
        ...    </go>"""
        >>> response = httpResponse( 200, form, {}, u, u )
        >>> w = wmlParser(response)
        >>> w.getForms()
        [Form({'clave': ['$(clave)'], 'cuenta': ['$(cuenta)'], 'tipdat': ['D']})]

        Get the simplest link
        >>> response = httpResponse( 200, '<a href="/index.aspx">ASP.NET</a>', {}, u, u )
        >>> w = wmlParser( response )
        >>> re, parsed = w.getReferences()
        
        #
        #    TODO:
        #        I don't really understand why I'm getting results @ the "re".
        #        They should really be inside the "parsed" list.
        #
        #    >>> re
        #    []
        #    >>> parsed[0].url_string
        #    u'http://www.w3af.com/index.aspx'

        Get a link by applying regular expressions
        >>> response = httpResponse(200, 'header /index.aspx footer', {}, u, u)
        >>> w = wmlParser( response )
        >>> re, parsed = w.getReferences()
        >>> #
        >>> # TODO: Shouldn't this be the other way around?!
        >>> #
        >>> re
        []
        >>> parsed[0].url_string
        u'http://www.w3af.com/index.aspx'
        '''
        SGMLParser._pre_parse(self, httpResponse)
        assert self._baseUrl is not None, 'The base URL must be set.'
예제 #4
0
 def __init__(self, http_resp):
     
     # An internal list to be used to save input tags found
     # outside of the scope of a form tag.
     self._saved_inputs = []
     # For <textarea> elems parsing
     self._textarea_tag_name = ""
     self._textarea_data = ""
     # For <select> elems parsing
     self._selects = []
     # Save for using in form parsing
     self._source_url = http_resp.getURL()
     # Call parent's __init__
     SGMLParser.__init__(self, http_resp)
예제 #5
0
 def _handle_select_tag_end(self, tag):
     """
     Handler for select end tag
     """
     SGMLParser._handle_select_tag_end(self, tag)
     if self._forms:
         form_obj = self._forms[-1]
         for sel_name, optvalues in self._selects:
             # First convert  to list of tuples before passing it as arg
             optvalues = [tuple(attrs.items()) for attrs in optvalues]
             form_obj.addSelect(sel_name, optvalues)
         
         # Reset selects container
         self._selects = []
예제 #6
0
    def _handle_form_tag_start(self, tag, attrs):
        '''
        Handle the form tags.

        This method also looks if there are "pending inputs" in the 
        self._saved_inputs list and parses them.
        '''
        SGMLParser._handle_form_tag_start(self, tag, attrs)
        
        # Get the 'method'
        method = attrs.get('method', 'GET').upper()

        # Get the action
        action = attrs.get('action', None)
        missing_or_invalid_action = action is None

        if not missing_or_invalid_action:
            action = self._decode_url(action)
            try:
                action = self._baseUrl.urlJoin(action)
            except ValueError:
                missing_or_invalid_action = True
        if missing_or_invalid_action:
            msg = ('HTMLParser found a form without an action attribute. '
            'Javascript may be used... but another option (mozilla does '
            'this) is that the form is expected to be  posted back to the'
            ' same URL (the one that returned the HTML that we are  parsing).')
            om.out.debug(msg)
            action = self._source_url
        
        # Create the form object and store everything for later use
        form_obj = form.Form(encoding=self._encoding)
        form_obj.setMethod(method)
        form_obj.setAction(action)
        self._forms.append(form_obj)

        # Now I verify if there are any input tags that were found
        # outside the scope of a form tag
        for inputattrs in self._saved_inputs:
            # Parse them just like if they were found AFTER the
            # form tag opening
            if isinstance(inputattrs, dict):
                self._handle_input_tag_inside_form('input', inputattrs)
        
        # All parsed, remove them.
        self._saved_inputs = []
예제 #7
0
 def __init__(self, httpResponse):
     self._select_tag_name = ""
     SGMLParser.__init__(self, httpResponse)