def run_remote(self, url): scheme = urlparse.urlparse(url).scheme if not scheme or not scheme.startswith('http'): url = 'http://%s' % (url, ) log.ThugLogging.set_url(url) log.HTTPSession = HTTPSession() doc = w3c.parseString('') window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent) window = window.open(url) if window: self.__run(window)
def run_local(self, url): log.last_url = None log.last_url_fetched = None log.ThugLogging.set_url(url) log.ThugOpts.local = True log.HTTPSession = HTTPSession() content = open(url, 'r').read() extension = os.path.splitext(url) encoding = cchardet.detect(content) if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ): if not content.lstrip().startswith('<script'): html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(content.decode(encoding['encoding']))))) else: soup = BeautifulSoup(content, "html.parser") try: soup.html.unwrap() except AttributeError: pass try: soup.head.unwrap() except AttributeError: pass try: soup.body.unwrap() except AttributeError: pass html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(soup.script.get_text())))) else: html = content if log.ThugOpts.features_logging: log.ThugLogging.Features.add_characters_count(len(html)) log.ThugLogging.Features.add_whitespaces_count(len([a for a in html if a.isspace()])) doc = w3c.parseString(html) window = Window('about:blank', doc, personality = log.ThugOpts.useragent) window.open() self.__run(window)
def run_local(self, url): log.ThugLogging.set_url(url) log.ThugOpts.local = True log.HTTPSession = HTTPSession() content = open(url, 'r').read() extension = os.path.splitext(url) if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ): html = tostring(E.HTML(E.BODY(E.SCRIPT(content)))) else: html = content doc = w3c.parseString(html) window = Window('about:blank', doc, personality = log.ThugOpts.useragent) window.open() self.__run(window)
def run_local(self, url): log.ThugLogging.set_url(url) log.ThugOpts.local = True log.HTTPSession = HTTPSession() content = open(url, 'r').read() extension = os.path.splitext(url) if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ): if not content.lstrip().startswith('<script'): html = tostring(E.HTML(E.BODY(E.SCRIPT(content)))) else: soup = BeautifulSoup(content, "html.parser") try: soup.html.unwrap() except AttributeError: pass try: soup.head.unwrap() except AttributeError: pass try: soup.body.unwrap() except AttributeError: pass html = tostring(E.HTML(E.BODY(E.SCRIPT(soup.script.get_text())))) else: html = content doc = w3c.parseString(html) window = Window('about:blank', doc, personality = log.ThugOpts.useragent) window.open() self.__run(window)
def run_remote(self, url): log.last_url = None log.last_url_fetched = None log.ThugOpts.local = False try: scheme = urlparse.urlparse(url).scheme except ValueError as e: log.warning("[WARNING] Analysis not performed (%s)", e.message) return if not scheme or not scheme.startswith('http'): url = 'http://%s' % (url, ) log.ThugLogging.set_url(url) log.HTTPSession = HTTPSession() doc = w3c.parseString('') window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent) window = window.open(url) if window: self.__run(window)
def setAttribute(self, name, value): from thug.DOM.W3C import w3c from thug.DOM.Window import Window from thug.DOM.DFT import DFT if log.ThugOpts.features_logging: log.ThugLogging.Features.increase_setattribute_count() if not isinstance(name, six.string_types): # pragma: no cover name = str(name) if log.ThugOpts.Personality.isFirefox(): if name in ('style', ): svalue = value.split('-') _value = svalue[0] if len(svalue) > 1: _value = '{}{}'.format(_value, ''.join([s.capitalize() for s in svalue[1:]])) for css in [p for p in FF_STYLES if log.ThugOpts.Personality.browserMajorVersion >= p[0]]: if css[1] in value: self.tag.attrs[name] = _value return if name in ('type', ): for _input in [p for p in FF_INPUTS if log.ThugOpts.Personality.browserMajorVersion > p[0]]: if _input[1] in value: self.tag.attrs[name] = value return self.tag.attrs[name] = value if name.lower() in ('src', 'archive'): s = urlparse.urlsplit(value) handler = getattr(log.SchemeHandler, 'handle_%s' % (s.scheme, ), None) if handler: handler(self.doc.window, value) return try: response = self.doc.window._navigator.fetch(value, redirect_type = "element workaround") except Exception: return if response is None or not response.ok: return ctype = response.headers.get('content-type', None) if ctype is None: return handler = log.MIMEHandler.get_handler(ctype) if handler: handler(self.doc.window.url, response.content) return if ctype.startswith(('text/html', )): doc = w3c.parseString(response.content) window = Window(response.url, doc, personality = log.ThugOpts.useragent) dft = DFT(window) dft.run()