Ejemplo n.º 1
0
 def _re_extract(self, swf_body):
     """
     Get the URLs using a regex
     """
     re_extract = ReExtract(swf_body, self._base_url, self._encoding)
     re_extract.parse()
     self._re_urls.update(re_extract.get_references())
Ejemplo n.º 2
0
Archivo: pdf.py Proyecto: z0r1nga/w3af
 def _parse(self, resp_body):
     """
     Get the URLs using a regex
     """
     doc_string = pdf_to_text(resp_body)
     re_extract = ReExtract(doc_string, self._base_url, self._encoding)
     self._re_urls = re_extract.get_references()
Ejemplo n.º 3
0
 def _re_extract(self, swf_body):
     """
     Get the URLs using a regex
     """
     re_extract = ReExtract(swf_body, self._base_url, self._encoding)
     re_extract.parse()
     self._re_urls.update(re_extract.get_references())
Ejemplo n.º 4
0
 def _parse(self, resp_body):
     """
     Get the URLs using a regex
     """
     doc_string = pdf_to_text(resp_body)
     re_extract = ReExtract(doc_string, self._base_url, self._encoding)
     self._re_urls = re_extract.get_references()
Ejemplo n.º 5
0
 def _parse(self, http_response):
     """
     Get the URLs using a regex
     """
     doc_string = http_response.get_body()
     re_extract = ReExtract(doc_string, self._base_url, self._encoding,
                            require_quotes=True)
     self._re_urls = re_extract.get_references()
Ejemplo n.º 6
0
 def parse(self):
     """
     Get the URLs using a regex
     """
     doc_string = pdf_to_text(self.get_http_response().get_body())
     re_extract = ReExtract(doc_string, self._base_url, self._encoding)
     re_extract.parse()
     self._re_urls = re_extract.get_references()
Ejemplo n.º 7
0
 def parse(self):
     """
     Get the URLs using a regex
     """
     doc_string = pdf_to_text(self.get_http_response().get_body())
     re_extract = ReExtract(doc_string, self._base_url, self._encoding)
     re_extract.parse()
     self._re_urls = re_extract.get_references()
Ejemplo n.º 8
0
 def parse(self):
     """
     Get the URLs using a regex
     """
     re_extract = ReExtract(self.get_http_response().get_body(),
                            self._base_url, self._encoding,
                            require_quotes=True)
     re_extract.parse()
     self._re_urls = re_extract.get_references()
Ejemplo n.º 9
0
 def parse(self):
     """
     Get the URLs using a regex
     """
     re_extract = ReExtract(self.get_http_response().get_body(),
                            self._base_url, self._encoding,
                            require_quotes=True)
     re_extract.parse()
     self._re_urls = re_extract.get_references()
Ejemplo n.º 10
0
 def _parse(self, http_response):
     """
     Get the URLs using a regex
     """
     doc_string = http_response.get_body()
     re_extract = ReExtract(doc_string,
                            self._base_url,
                            self._encoding,
                            require_quotes=True)
     self._re_urls = re_extract.get_references()
Ejemplo n.º 11
0
    def test_relative_regex(self):
        doc_string = '123 ../../foobar/uploads/foo.png 465'
        base_url = URL('https://w3af.org/abc/def/')

        re_extract = ReExtract(doc_string, base_url, 'utf-8')
        re_extract.parse()

        references = re_extract.get_references()

        self.assertEqual(references,
                         [URL('https://w3af.org/foobar/uploads/foo.png')])
Ejemplo n.º 12
0
Archivo: html.py Proyecto: zcr214/w3af
    def _handle_script_tag_start(self, tag, tag_name, attrs):
        """
        Handle the script tags
        """
        SGMLParser._handle_script_tag_start(self, tag, tag_name, attrs)

        if tag.text is not None:
            re_extract = ReExtract(tag.text.strip(), self._base_url,
                                   self._encoding)
            re_extract.parse()
            self._re_urls.update(re_extract.get_references())
Ejemplo n.º 13
0
    def data(self, data):
        """
        Overriding parent's. Called by the main parser when a text node
        is found
        """
        if self._inside_textarea:
            self._textarea_data = data.strip()

        elif self._inside_script:
            re_extract = ReExtract(data.strip(), self._base_url, self._encoding)
            self._re_urls.update(re_extract.get_references())
Ejemplo n.º 14
0
    def data(self, data):
        """
        Overriding parent's. Called by the main parser when a text node
        is found
        """
        if self._inside_textarea:
            self._textarea_data = data.strip()

        elif self._inside_script:
            re_extract = ReExtract(data.strip(), self._base_url, self._encoding)
            self._re_urls.update(re_extract.get_references())
Ejemplo n.º 15
0
    def _handle_script_tag_start(self, tag, tag_name, attrs):
        """
        Handle the script tags
        """
        SGMLParser._handle_script_tag_start(self, tag, tag_name, attrs)

        if tag.text is not None:
            re_extract = ReExtract(tag.text.strip(),
                                   self._base_url,
                                   self._encoding)
            re_extract.parse()
            self._re_urls.update(re_extract.get_references())
Ejemplo n.º 16
0
    def _get_references_regex(self, mutant, mutant_response):
        """
        Apply regular expressions to extract links from the HTTP response body.

        :param mutant: The request used to upload the file
        :param mutant_response: The HTTP response to parse
        :return: References (links) found in the HTTP response that end with the
                 uploaded filename.
        """
        # Quick performance improvement
        if mutant.uploaded_file_name not in mutant_response.get_body():
            return []

        # Apply the regular expressions and extract links
        re_extract = ReExtract(mutant_response.get_body(),
                               mutant_response.get_uri(),
                               mutant_response.get_charset())
        re_extract.parse()

        return re_extract.get_references()