コード例 #1
0
ファイル: DFT.py プロジェクト: tomdaq/thug
    def handle_form(self, form):
        from .Window import Window

        log.info(form)

        action = form.get('action', None)
        if action is None:
            return

        _action = log.HTTPSession.normalize_url(self.window, action)
        if _action is None:
            return

        if _action in self.forms:
            return

        self.forms.append(_action)
        method = form.get('method', 'get')

        payload = None

        for child in form.find_all():
            name = getattr(child, 'name', None)

            if name.lower() in ('input', ):
                if payload is None:
                    payload = dict()

                if all(p in child.attrs for p in ('name', 'value', )):
                    payload[child.attrs['name']] = child.attrs['value']

        headers = dict()
        headers['Content-Type'] = 'application/x-www-form-urlencoded'

        try:
            response = self.window._navigator.fetch(action,
                                                    headers = headers,
                                                    method = method.upper(),
                                                    body = payload,
                                                    redirect_type = "form")
        except Exception:
            return

        if response is None:
            return

        if response.status_code == 404:
            return

        ctype = response.headers.get('content-type', None)
        if ctype:
            handler = log.MIMEHandler.get_handler(ctype)
            if handler and handler(action, response.content):
                return

        doc    = w3c.parseString(response.content)
        window = Window(_action, doc, personality = log.ThugOpts.useragent)

        dft = DFT(window, forms = self.forms)
        dft.run()
コード例 #2
0
    def set_href(self, url):
        from .Window import Window

        if url.startswith("data:"):
            log.DFT._handle_data_uri(url)
            return

        referer = self._window.url
        if referer == url:
            log.warning("Detected redirection from %s to %s... skipping", referer, url)
            return

        for p in log.ThugOpts.Personality:
            if log.ThugOpts.Personality[p]['userAgent'] == self._window._navigator.userAgent:
                break

        url = log.HTTPSession.normalize_url(self._window, url)
        log.ThugLogging.log_href_redirect(referer, url)

        doc    = w3c.parseString('')
        window = Window(referer, doc, personality = p)  # pylint:disable=undefined-loop-variable
        window = window.open(url)
        if not window:
            return

        # self._window.url = url
        dft = DFT(window)
        dft.run()
コード例 #3
0
ファイル: Location.py プロジェクト: buffer/thug
    def set_href(self, url):
        from .Window import Window

        if url.startswith("data:"):
            log.DFT._handle_data_uri(url)
            return

        referer = self._window.url
        if referer == url:
            log.warning("Detected redirection from %s to %s... skipping", referer, url)
            return

        for p in log.ThugOpts.Personality:
            if log.ThugOpts.Personality[p]['userAgent'] == self._window._navigator.userAgent:
                break

        url = log.HTTPSession.normalize_url(self._window, url)
        log.ThugLogging.log_href_redirect(referer, url)

        doc    = w3c.parseString('')
        window = Window(referer, doc, personality = p)  # pylint:disable=undefined-loop-variable
        window = window.open(url)
        if not window:
            return

        # self._window.url = url
        dft = DFT(window)
        dft.run()
コード例 #4
0
ファイル: DFT.py プロジェクト: ankitdobhal/thug
    def do_handle_form(self, form):
        from .Window import Window

        log.info(form)

        action = form.get('action', None)
        if action in (None, 'self', ): # pragma: no cover
            last_url = getattr(log, 'last_url', None)
            action = last_url if last_url else self.window.url

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.increase_url_count()

        _action = log.HTTPSession.normalize_url(self.window, action)
        if _action is None: # pragma: no cover
            return

        if _action not in self.forms:
            self.forms.append(_action)

        method = form.get('method', 'get')
        payload = None

        for child in form.find_all():
            name = getattr(child, 'name', None)

            if name.lower() in ('input', ):
                if payload is None:
                    payload = dict()

                if all(p in child.attrs for p in ('name', 'value', )):
                    payload[child.attrs['name']] = child.attrs['value']

        headers = dict()
        headers['Content-Type'] = 'application/x-www-form-urlencoded'

        try:
            response = self.window._navigator.fetch(action,
                                                    headers = headers,
                                                    method = method.upper(),
                                                    body = payload,
                                                    redirect_type = "form")
        except Exception as e: # pragma: no cover
            log.info("[ERROR][do_handle_form] %s", str(e))
            return

        if response is None or not response.ok:
            return

        if getattr(response, 'thug_mimehandler_hit', False): # pragma: no cover
            return

        doc    = w3c.parseString(response.content)
        window = Window(_action, doc, personality = log.ThugOpts.useragent)

        dft = DFT(window, forms = self.forms)
        dft.run()
コード例 #5
0
ファイル: DFT.py プロジェクト: Nikhilesh2317/thug
    def handle_meta_refresh(self, http_equiv, content):
        from .Window import Window

        if http_equiv.lower() != 'refresh':
            return

        if 'url' not in content.lower():
            return

        url = None
        data_uri = True if 'data:' in content else False

        for s in content.split(';'):
            if data_uri is True and url is not None:
                url = "{};{}".format(url, s)

            s = s.strip()
            if s.lower().startswith('url='):
                url = s[4:]

        if not url:
            return

        if url.startswith("'") and url.endswith("'"):
            url = url[1:-1]

        if url in self.meta and self.meta[url] >= 3:
            return

        if data_uri:
            self._handle_data_uri(url)
            return

        try:
            response = self.window._navigator.fetch(url, redirect_type="meta")
        except Exception:
            return

        if response is None:
            return

        if response.status_code == 404:
            return

        if url in self.meta:
            self.meta[url] += 1
        else:
            self.meta[url] = 1

        doc = w3c.parseString(response.content)
        window = Window(self.window.url,
                        doc,
                        personality=log.ThugOpts.useragent)
        # window.open(url)

        dft = DFT(window)
        dft.run()
コード例 #6
0
ファイル: DFT.py プロジェクト: tomdaq/thug
    def _handle_data_uri(self, uri):
        """
        Data URI Scheme
        data:[<MIME-type>][;charset=<encoding>][;base64],<data>

        The encoding is indicated by ;base64. If it is present the data is
        encoded as base64. Without it the data (as a sequence of octets) is
        represented using ASCII encoding for octets inside the range of safe
        URL characters and using the standard %xx hex encoding of URLs for
        octets outside that range. If <MIME-type> is omitted, it defaults to
        text/plain;charset=US-ASCII. (As a shorthand, the type can be omitted
        but the charset parameter supplied.)

        Some browsers (Chrome, Opera, Safari, Firefox) accept a non-standard
        ordering if both ;base64 and ;charset are supplied, while Internet
        Explorer requires that the charset's specification must precede the
        base64 token.
        """
        if not uri.lower().startswith("data:"):
            return False

        log.URLClassifier.classify(uri)

        h = uri.split(",")
        if len(h) < 2:
            return False

        data = h[1]
        opts = h[0][len("data:"):].split(";")

        if 'base64' in opts:
            data = base64.b64decode(h[1])
            opts.remove('base64')

        if not opts:
            opts = ["text/plain", "charset=US-ASCII"]

        mimetype = opts[0]

        if mimetype in ('text/html', ):
            from .Window import Window

            doc    = w3c.parseString(data)
            window = Window(self.window.url, doc, personality = log.ThugOpts.useragent)
            # window.open(uri)

            dft = DFT(window)
            dft.run()
            return True

        handler = log.MIMEHandler.get_handler(mimetype)
        if handler:
            handler(self.window.url, data)
            return True

        return False
コード例 #7
0
ファイル: DFT.py プロジェクト: ankitdobhal/thug
    def follow_href(self, href):
        from .Window import Window

        doc    = w3c.parseString('')
        window = Window(self.window.url, doc, personality = log.ThugOpts.useragent)
        window = window.open(href)

        if window:
            dft = DFT(window)
            dft.run()
コード例 #8
0
    def run_local(self, url):
        log.last_url = None
        log.last_url_fetched = None

        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content = open(url, 'r', encoding="utf-8").read()
        extension = os.path.splitext(url)

        if len(extension) > 1 and extension[1].lower() in (
                '.js',
                '.jse',
        ):
            if not content.lstrip().startswith('<script'):
                html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(content))))
            else:
                soup = bs4.BeautifulSoup(content, "html.parser")

                try:
                    soup.html.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.head.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.body.unwrap()
                except AttributeError:
                    pass

                code = soup.script.get_text(types=(NavigableString, CData,
                                                   Script))
                html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(code))))
        else:
            html = content

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.add_characters_count(len(html))

            whitespaces_count = len([
                a for a in html
                if isinstance(a, six.string_types) and a.isspace()
            ])
            log.ThugLogging.Features.add_whitespaces_count(whitespaces_count)

        doc = w3c.parseString(html)
        window = Window('about:blank', doc, personality=log.ThugOpts.useragent)
        window.open()
        self.__run(window)
コード例 #9
0
ファイル: ThugAPI.py プロジェクト: netwrkspider/thug
    def run_local(self, url):
        log.last_url = None
        log.last_url_fetched = None

        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content = open(url, 'r').read()
        extension = os.path.splitext(url)
        encoding = cchardet.detect(content)

        if len(extension) > 1 and extension[1].lower() in (
                '.js',
                '.jse',
        ):
            if not content.lstrip().startswith('<script'):
                html = tostring(
                    E.HTML(
                        E.HEAD(),
                        E.BODY(E.SCRIPT(content.decode(
                            encoding['encoding'])))))
            else:
                soup = BeautifulSoup(content, "html.parser")

                try:
                    soup.html.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.head.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.body.unwrap()
                except AttributeError:
                    pass

                html = tostring(
                    E.HTML(E.HEAD(), E.BODY(E.SCRIPT(soup.script.get_text()))))
        else:
            html = content

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.add_characters_count(len(html))
            log.ThugLogging.Features.add_whitespaces_count(
                len([a for a in html if a.isspace()]))

        doc = w3c.parseString(html)
        window = Window('about:blank', doc, personality=log.ThugOpts.useragent)
        window.open()
        self.__run(window)
コード例 #10
0
ファイル: DFT.py プロジェクト: ankitdobhal/thug
    def handle_meta_refresh(self, http_equiv, content):
        from .Window import Window

        if http_equiv.lower() not in ('refresh', ) or 'url' not in content.lower():
            return

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.increase_meta_refresh_count()
            log.ThugLogging.Features.increase_url_count()

        url = None
        data_uri = True if 'data:' in content else False

        for s in content.split(';'):
            if data_uri is True and url is not None:
                url = "{};{}".format(url, s)

            s = s.strip()
            if s.lower().startswith('url='):
                url = s[4:]

        if not url: # pragma: no cover
            return

        if url.startswith("'") and url.endswith("'"):
            url = url[1:-1]

        if url in log.ThugLogging.meta and log.ThugLogging.meta[url] >= 3: # pragma: no cover
            return

        if data_uri:
            self._handle_data_uri(url)
            return

        try:
            response = self.window._navigator.fetch(url, redirect_type = "meta")
        except Exception as e:
            log.info("[ERROR][handle_meta_refresh] %s", str(e))
            return

        if response is None or not response.ok:
            return

        if url not in log.ThugLogging.meta:
            log.ThugLogging.meta[url] = 0

        log.ThugLogging.meta[url] += 1

        doc    = w3c.parseString(response.content)
        window = Window(self.window.url, doc, personality = log.ThugOpts.useragent)

        dft = DFT(window)
        dft.run()
コード例 #11
0
ファイル: MicrosoftXMLDOM.py プロジェクト: buffer/thug
def loadXML(self, bstrXML):
    self.xml = w3c.parseString(bstrXML)

    if "res://" not in bstrXML:
        return

    for p in bstrXML.split('"'):
        if p.startswith("res://"):
            log.URLClassifier.classify(p)
            log.ThugLogging.add_behavior_warn("[Microsoft XMLDOM ActiveX] Attempting to load %s" % (p, ))
            log.ThugLogging.log_classifier("exploit", log.ThugLogging.url, "CVE-2017-0022", None)
            if any(sys.lower() in p.lower() for sys in security_sys):
                self.parseError._errorCode = 0
コード例 #12
0
    def frames(self):
        """an array of all the frames (including iframes) in the current window"""
        from thug.DOM.W3C.HTML.HTMLCollection import HTMLCollection

        for frame in self._findAll(['frame', 'iframe']):
            code = unicode(frame)

            if code in self._inner_frames:
                continue

            self._inner_frames.add(code)
            self._frames.add(Window(self.url, w3c.parseString(code), personality = log.ThugOpts.useragent))

        return HTMLCollection(self.doc, list(self._frames))
コード例 #13
0
ファイル: ThugAPI.py プロジェクト: tweemeterjop/thug
    def run_remote(self, url):
        scheme = urlparse.urlparse(url).scheme

        if not scheme or not scheme.startswith('http'):
            url = 'http://%s' % (url, )

        log.ThugLogging.set_url(url)

        log.HTTPSession = HTTPSession()

        doc    = w3c.parseString('')
        window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent)
        window = window.open(url)
        if window:
            self.__run(window)
コード例 #14
0
ファイル: MicrosoftXMLDOM.py プロジェクト: Ru0ch3n/thug
def loadXML(self, bstrXML):
    self.xml = w3c.parseString(bstrXML)

    if "res://" not in bstrXML:
        return

    for p in bstrXML.split('"'):
        if p.startswith("res://"):
            log.URLClassifier.classify(p)
            log.ThugLogging.add_behavior_warn(
                "[Microsoft XMLDOM ActiveX] Attempting to load %s" % (p, ))
            log.ThugLogging.log_classifier("exploit", log.ThugLogging.url,
                                           "CVE-2017-0022", None)
            if any(sys.lower() in p.lower() for sys in security_sys):
                self.parseError._errorCode = 0
コード例 #15
0
ファイル: ThugAPI.py プロジェクト: Josquin95/thug
    def run_remote(self, url):
        scheme = urlparse.urlparse(url).scheme

        if not scheme or not scheme.startswith('http'):
            url = 'http://%s' % (url, )

        log.ThugLogging.set_url(url)

        log.HTTPSession = HTTPSession()

        doc    = w3c.parseString('')
        window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent)
        window = window.open(url)
        if window:
            self.__run(window)
コード例 #16
0
ファイル: ThugAPI.py プロジェクト: buffer/thug
    def run_local(self, url):
        log.last_url = None
        log.last_url_fetched = None

        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content   = open(url, 'r').read()
        extension = os.path.splitext(url)
        encoding  = cchardet.detect(content)

        if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ):
            if not content.lstrip().startswith('<script'):
                html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(content.decode(encoding['encoding'])))))
            else:
                soup = BeautifulSoup(content, "html.parser")

                try:
                    soup.html.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.head.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.body.unwrap()
                except AttributeError:
                    pass

                html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(soup.script.get_text()))))
        else:
            html = content

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.add_characters_count(len(html))
            log.ThugLogging.Features.add_whitespaces_count(len([a for a in html if a.isspace()]))

        doc    = w3c.parseString(html)
        window = Window('about:blank', doc, personality = log.ThugOpts.useragent)
        window.open()
        self.__run(window)
コード例 #17
0
    def run_local(self, url):
        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content   = open(url, 'r').read()
        extension = os.path.splitext(url)

        if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ):
            html = tostring(E.HTML(E.BODY(E.SCRIPT(content))))
        else:
            html = content

        doc    = w3c.parseString(html)
        window = Window('about:blank', doc, personality = log.ThugOpts.useragent)
        window.open()
        self.__run(window)
コード例 #18
0
ファイル: MicrosoftXMLDOM.py プロジェクト: pdelsante/thug
def loadXML(self, bstrXML):
    self.xml = w3c.parseString(bstrXML)
    #self.attributes = NamedNodeMap(self.xml._node)

    if "res://" not in bstrXML:
        return

    for p in bstrXML.split('"'):
        if p.startswith("res://"):
            log.ThugLogging.add_behavior_warn("[Microsoft XMLDOM ActiveX] Attempting to load %s" % (p, ))
            if any(sys.lower() in p.lower() for sys in security_sys):
                self.parseError._errorCode = 0

    for p in bstrXML.split("'"):
        if p.startswith("res://"):
            log.ThugLogging.add_behavior_warn("[Microsoft XMLDOM ActiveX] Attempting to load %s" % (p, ))
            if any(sys.lower() in p.lower() for sys in security_sys):
                self.parseError._errorCode = 0
コード例 #19
0
ファイル: ThugAPI.py プロジェクト: PoeBlu/thug
    def run_local(self, url):
        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content = open(url, 'r').read()
        extension = os.path.splitext(url)

        if len(extension) > 1 and extension[1].lower() in ('.js'):
            html = tostring(E.HTML(E.BODY(E.SCRIPT(content))))
        else:
            html = content

        doc = w3c.parseString(html)
        window = Window('about:blank', doc, personality=log.ThugOpts.useragent)
        window.open()
        self.run(window)
コード例 #20
0
ファイル: DFT.py プロジェクト: Nikhilesh2317/thug
    def handle_frame(self, frame, redirect_type='frame'):
        from .Window import Window

        log.warning(frame)

        src = frame.get('src', None)
        if not src:
            return

        if self._handle_data_uri(src):
            return

        try:
            response = self.window._navigator.fetch(
                src, redirect_type=redirect_type)
        except Exception:
            return

        if response is None:
            return

        if response.status_code == 404:
            return

        ctype = response.headers.get('content-type', None)
        if ctype:
            handler = log.MIMEHandler.get_handler(ctype)
            if handler and handler(src, response.content):
                return

        _src = log.HTTPSession.normalize_url(self.window, src)
        if _src:
            src = _src

        doc = w3c.parseString(response.content)
        window = Window(response.url, doc, personality=log.ThugOpts.useragent)
        # window.open(src)

        frame_id = frame.get('id', None)
        if frame_id:
            log.ThugLogging.windows[frame_id] = window

        dft = DFT(window)
        dft.run()
コード例 #21
0
ファイル: DFT.py プロジェクト: ankitdobhal/thug
    def handle_frame(self, frame, redirect_type = 'frame'):
        from .Window import Window

        log.warning(frame)

        src = frame.get('src', None)
        if not src:
            return

        if self._handle_data_uri(src):
            return

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.increase_url_count()

        try:
            response = self.window._navigator.fetch(src, redirect_type = redirect_type)
        except Exception as e:
            log.info("[ERROR][handle_frame] %s", str(e))
            return

        if response is None or not response.ok: # pragma: no cover
            return # pragma: no cover

        if response.url in log.ThugLogging.frames and log.ThugLogging.frames[response.url] >= 3:
            return # pragma: no cover

        if response.url not in log.ThugLogging.frames:
            log.ThugLogging.frames[response.url] = 0

        log.ThugLogging.frames[response.url] += 1

        if getattr(response, 'thug_mimehandler_hit', False):
            return # pragma: no cover

        doc    = w3c.parseString(response.content)
        window = Window(response.url, doc, personality = log.ThugOpts.useragent)

        frame_id = frame.get('id', None)
        if frame_id:
            log.ThugLogging.windows[frame_id] = window

        dft = DFT(window)
        dft.run()
コード例 #22
0
    def run_local(self, url):
        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content = open(url, 'r').read()
        extension = os.path.splitext(url)

        if len(extension) > 1 and extension[1].lower() in (
                '.js',
                '.jse',
        ):
            if not content.lstrip().startswith('<script'):
                html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(content))))
            else:
                soup = BeautifulSoup(content, "html.parser")

                try:
                    soup.html.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.head.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.body.unwrap()
                except AttributeError:
                    pass

                html = tostring(
                    E.HTML(E.HEAD(), E.BODY(E.SCRIPT(soup.script.get_text()))))
        else:
            html = content

        doc = w3c.parseString(html)
        window = Window('about:blank', doc, personality=log.ThugOpts.useragent)
        window.open()
        self.__run(window)
コード例 #23
0
def loadXML(self, bstrXML):
    self.xml = w3c.parseString(bstrXML)
    # self.attributes = NamedNodeMap(self.xml._node)

    if "res://" not in bstrXML:
        return

    for p in bstrXML.split('"'):
        if p.startswith("res://"):
            log.URLClassifier.classify(p)
            log.ThugLogging.add_behavior_warn("[Microsoft XMLDOM ActiveX] Attempting to load %s" % (p, ))
            if any(sys.lower() in p.lower() for sys in security_sys):
                self.parseError._errorCode = 0

    for p in bstrXML.split("'"):
        if p.startswith("res://"):
            log.URLClassifier.classify(p)
            log.ThugLogging.add_behavior_warn("[Microsoft XMLDOM ActiveX] Attempting to load %s" % (p, ))
            if any(sys.lower() in p.lower() for sys in security_sys):
                self.parseError._errorCode = 0
コード例 #24
0
    def run_remote(self, url):
        log.last_url = None

        try:
            scheme = urlparse.urlparse(url).scheme
        except ValueError as e:
            log.warning("[WARNING] Analysis not performed (%s)", e.message)
            return

        if not scheme or not scheme.startswith('http'):
            url = 'http://%s' % (url, )

        log.ThugLogging.set_url(url)

        log.HTTPSession = HTTPSession()

        doc    = w3c.parseString('')
        window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent)
        window = window.open(url)
        if window:
            self.__run(window)
コード例 #25
0
ファイル: Shellcode.py プロジェクト: kartikeyap/thug
    def search_url(self, sc):
        from thug.DOM.W3C import w3c
        from thug.DOM.Window import Window
        from thug.DOM.DFT import DFT

        offset = sc.find('http')

        if offset > 0:
            url = sc[offset:].split()[0]
            if url.endswith("'") or url.endswith('"'):
                url = url[:-1]

            if url in log.ThugLogging.shellcode_urls:
                return

            if url in log.ThugLogging.retrieved_urls:
                return

            log.info('[Shellcode Analysis] URL Detected: %s', url)

            try:
                response = self.window._navigator.fetch(
                    url, redirect_type="URL found")
                log.ThugLogging.shellcode_urls.add(url)
            except Exception:
                return

            if response is None:
                return

            if not response.ok:
                return

            doc = w3c.parseString(response.content)
            window = Window(url, doc, personality=log.ThugOpts.useragent)

            dft = DFT(window)
            dft.run()
コード例 #26
0
ファイル: ThugAPI.py プロジェクト: tweemeterjop/thug
    def run_local(self, url):
        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content   = open(url, 'r').read()
        extension = os.path.splitext(url)

        if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ):
            if not content.lstrip().startswith('<script'):
                html = tostring(E.HTML(E.BODY(E.SCRIPT(content))))
            else:
                soup = BeautifulSoup(content, "html.parser")

                try:
                    soup.html.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.head.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.body.unwrap()
                except AttributeError:
                    pass

                html = tostring(E.HTML(E.BODY(E.SCRIPT(soup.script.get_text()))))
        else:
            html = content

        doc    = w3c.parseString(html)
        window = Window('about:blank', doc, personality = log.ThugOpts.useragent)
        window.open()
        self.__run(window)
コード例 #27
0
ファイル: Shellcode.py プロジェクト: buffer/thug
    def search_url(self, sc):
        from thug.DOM.W3C import w3c
        from thug.DOM.Window import Window
        from thug.DOM.DFT import DFT

        offset = sc.find('http')

        if offset > 0:
            url = sc[offset:].split()[0]
            if url.endswith("'") or url.endswith('"'):
                url = url[:-1]

            if url in log.ThugLogging.shellcode_urls:
                return

            if url in log.ThugLogging.retrieved_urls:
                return

            log.info('[Shellcode Analysis] URL Detected: %s', url)

            try:
                response = self.window._navigator.fetch(url, redirect_type = "URL found")
                log.ThugLogging.shellcode_urls.add(url)
            except Exception:
                return

            if response is None:
                return

            if not response.ok:
                return

            doc    = w3c.parseString(response.content)
            window = Window(url, doc, personality = log.ThugOpts.useragent)

            dft = DFT(window)
            dft.run()
コード例 #28
0
ファイル: DFT.py プロジェクト: ankitdobhal/thug
    def handle_a(self, anchor):
        log.info(anchor)

        self.anchors.append(anchor)

        if not log.ThugOpts.extensive:
            return

        href = anchor.get('href', None)
        if not href: # pragma: no cover
            return

        if self._handle_data_uri(href):
            return

        try:
            response = self.window._navigator.fetch(href, redirect_type = "anchor")
        except Exception as e: # pragma: no cover
            log.info("[ERROR][handle_a] %s", str(e))
            return

        if response is None or not response.ok: # pragma: no cover
            return

        content_type = response.headers.get('content-type', None)
        if not content_type: # pragma: no cover
            return

        if content_type.startswith(('text/html', )):
            from .Window import Window

            doc    = w3c.parseString(response.content)
            window = Window(self.window.url, doc, personality = log.ThugOpts.useragent)

            dft = DFT(window)
            dft.run()
コード例 #29
0
ファイル: ThugAPI.py プロジェクト: buffer/thug
    def run_remote(self, url):
        log.last_url = None
        log.last_url_fetched = None

        log.ThugOpts.local = False

        try:
            scheme = urlparse.urlparse(url).scheme
        except ValueError as e:
            log.warning("[WARNING] Analysis not performed (%s)", e.message)
            return

        if not scheme or not scheme.startswith('http'):
            url = 'http://%s' % (url, )

        log.ThugLogging.set_url(url)

        log.HTTPSession = HTTPSession()

        doc    = w3c.parseString('')
        window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent)
        window = window.open(url)
        if window:
            self.__run(window)
コード例 #30
0
ファイル: Element.py プロジェクト: buffer/thug
    def setAttribute(self, name, value):
        from thug.DOM.W3C import w3c
        from thug.DOM.Window import Window
        from thug.DOM.DFT import DFT

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.increase_setattribute_count()

        if not isinstance(name, six.string_types):
            name = str(name)

        if log.ThugOpts.Personality.isFirefox():
            if name in ('style', ):
                svalue = value.split('-')

                _value = svalue[0]
                if len(svalue) > 1:
                    _value = '{}{}'.format(_value, ''.join([s.capitalize() for s in svalue[1:]]))

                for css in [p for p in FF_STYLES if log.ThugOpts.Personality.browserMajorVersion >= p[0]]:
                    if css[1] in value:
                        self.tag.attrs[name] = _value
                return

            if name in ('type', ):
                for _input in [p for p in FF_INPUTS if log.ThugOpts.Personality.browserMajorVersion > p[0]]:
                    if _input[1] in value:
                        self.tag.attrs[name] = value
                return

        self.tag.attrs[name] = value

        if name.lower() in ('src', 'archive'):
            s = urlparse.urlsplit(value)

            handler = getattr(log.SchemeHandler, 'handle_%s' % (s.scheme, ), None)
            if handler:
                handler(self.doc.window, value)
                return

            try:
                response = self.doc.window._navigator.fetch(value, redirect_type = "element workaround")
            except Exception:
                return

            if response is None or not response.ok:
                return

            ctype = response.headers.get('content-type', None)
            if ctype is None:
                return

            handler = log.MIMEHandler.get_handler(ctype)
            if handler:
                handler(self.doc.window.url, response.content)
                return

            if ctype.startswith(('text/html', )):
                doc = w3c.parseString(response.content)
                window = Window(response.url, doc, personality = log.ThugOpts.useragent)
                dft = DFT(window)
                dft.run()
コード例 #31
0
    def setAttribute(self, name, value):
        from thug.DOM.W3C import w3c
        from thug.DOM.Window import Window
        from thug.DOM.DFT import DFT

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.increase_setattribute_count()

        if not isinstance(name, six.string_types):  # pragma: no cover
            name = str(name)

        if log.ThugOpts.Personality.isFirefox():
            if name in ('style', ):
                svalue = value.split('-')

                _value = svalue[0]
                if len(svalue) > 1:
                    _value = '{}{}'.format(
                        _value, ''.join([s.capitalize() for s in svalue[1:]]))

                for css in [
                        p for p in FF_STYLES
                        if log.ThugOpts.Personality.browserMajorVersion >= p[0]
                ]:
                    if css[1] in value:
                        self.tag.attrs[name] = _value
                return

            if name in ('type', ):
                for _input in [
                        p for p in FF_INPUTS
                        if log.ThugOpts.Personality.browserMajorVersion > p[0]
                ]:
                    if _input[1] in value:
                        self.tag.attrs[name] = value
                return

        self.tag.attrs[name] = value

        if name.lower() in ('src', 'archive'):
            s = urlparse.urlsplit(value)

            handler = getattr(log.SchemeHandler, 'handle_%s' % (s.scheme, ),
                              None)
            if handler:
                handler(self.doc.window, value)
                return

            try:
                response = self.doc.window._navigator.fetch(
                    value, redirect_type="element workaround")
            except Exception:
                return

            if response is None or not response.ok:
                return

            ctype = response.headers.get('content-type', None)
            if ctype is None:  # pragma: no cover
                return

            handler = log.MIMEHandler.get_handler(ctype)
            if handler:
                handler(self.doc.window.url, response.content)
                return

            if ctype.startswith(('text/html', )):
                doc = w3c.parseString(response.content)
                window = Window(response.url,
                                doc,
                                personality=log.ThugOpts.useragent)
                dft = DFT(window)
                dft.run()
コード例 #32
0
ファイル: DFT.py プロジェクト: ankitdobhal/thug
    def _handle_data_uri(self, uri):
        """
        Data URI Scheme
        data:[<MIME-type>][;charset=<encoding>][;base64],<data>

        The encoding is indicated by ;base64. If it is present the data is
        encoded as base64. Without it the data (as a sequence of octets) is
        represented using ASCII encoding for octets inside the range of safe
        URL characters and using the standard %xx hex encoding of URLs for
        octets outside that range. If <MIME-type> is omitted, it defaults to
        text/plain;charset=US-ASCII. (As a shorthand, the type can be omitted
        but the charset parameter supplied.)

        Some browsers (Chrome, Opera, Safari, Firefox) accept a non-standard
        ordering if both ;base64 and ;charset are supplied, while Internet
        Explorer requires that the charset's specification must precede the
        base64 token.
        """
        uri = uri if isinstance(uri, six.string_types) else str(uri)
        if not uri.lower().startswith("data:"):
            return None

        log.URLClassifier.classify(uri)

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.increase_data_uri_count()

        h = uri.split(",")
        if len(h) < 2 or not h[1]: # pragma: no cover
            return None

        data = h[1]
        opts = h[0][len("data:"):].split(";")

        if 'base64' in opts:
            try:
                data = base64.b64decode(h[1])
            except Exception: # pragma: no cover
                try:
                    data = base64.b64decode(urlparse.unquote(h[1]))
                except Exception:
                    log.warning("[WARNING] Error while handling data URI: %s", data)
                    return None

            opts.remove('base64')

        if not opts or not opts[0]:
            opts = ["text/plain", "charset=US-ASCII"]

        mimetype = opts[0]

        handler = log.MIMEHandler.get_handler(mimetype)
        if handler:
            handler(self.window.url, data)
            return None

        if mimetype.startswith(('text/html', )):
            from .Window import Window

            doc    = w3c.parseString(data)
            window = Window(self.window.url, doc, personality = log.ThugOpts.useragent)

            dft = DFT(window)
            dft.run()

        return data