def run_local(self, url): log.last_url = None log.last_url_fetched = None log.ThugLogging.set_url(url) log.ThugOpts.local = True log.HTTPSession = HTTPSession() content = open(url, 'r').read() extension = os.path.splitext(url) encoding = cchardet.detect(content) if len(extension) > 1 and extension[1].lower() in ( '.js', '.jse', ): if not content.lstrip().startswith('<script'): html = tostring( E.HTML( E.HEAD(), E.BODY(E.SCRIPT(content.decode( encoding['encoding']))))) else: soup = BeautifulSoup(content, "html.parser") try: soup.html.unwrap() except AttributeError: pass try: soup.head.unwrap() except AttributeError: pass try: soup.body.unwrap() except AttributeError: pass html = tostring( E.HTML(E.HEAD(), E.BODY(E.SCRIPT(soup.script.get_text())))) else: html = content if log.ThugOpts.features_logging: log.ThugLogging.Features.add_characters_count(len(html)) log.ThugLogging.Features.add_whitespaces_count( len([a for a in html if a.isspace()])) doc = w3c.parseString(html) window = Window('about:blank', doc, personality=log.ThugOpts.useragent) window.open() self.__run(window)
def run_local(self, url): log.last_url = None log.last_url_fetched = None log.ThugLogging.set_url(url) log.ThugOpts.local = True log.HTTPSession = HTTPSession() content = open(url, 'r', encoding="utf-8").read() extension = os.path.splitext(url) if len(extension) > 1 and extension[1].lower() in ( '.js', '.jse', ): if not content.lstrip().startswith('<script'): html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(content)))) else: soup = bs4.BeautifulSoup(content, "html.parser") try: soup.html.unwrap() except AttributeError: pass try: soup.head.unwrap() except AttributeError: pass try: soup.body.unwrap() except AttributeError: pass code = soup.script.get_text(types=(NavigableString, CData, Script)) html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(code)))) else: html = content if log.ThugOpts.features_logging: log.ThugLogging.Features.add_characters_count(len(html)) whitespaces_count = len([ a for a in html if isinstance(a, six.string_types) and a.isspace() ]) log.ThugLogging.Features.add_whitespaces_count(whitespaces_count) doc = w3c.parseString(html) window = Window('about:blank', doc, personality=log.ThugOpts.useragent) window.open() self.__run(window)
def run_remote(self, url): log.last_url = None log.last_url_fetched = None log.ThugOpts.local = False try: scheme = urlparse.urlparse(url).scheme except ValueError as e: log.warning("[WARNING] Analysis not performed (%s)", e.message) return if not scheme or not scheme.startswith('http'): url = 'http://%s' % (url, ) log.ThugLogging.set_url(url) log.HTTPSession = HTTPSession() doc = w3c.parseString('') window = Window(log.ThugOpts.referer, doc, personality=log.ThugOpts.useragent) window = window.open(url) if window: self.__run(window)
def run_local(self, url): log.last_url = None log.last_url_fetched = None log.ThugLogging.set_url(url) log.ThugOpts.local = True log.HTTPSession = HTTPSession() content = open(url, 'r').read() extension = os.path.splitext(url) encoding = cchardet.detect(content) if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ): if not content.lstrip().startswith('<script'): html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(content.decode(encoding['encoding']))))) else: soup = BeautifulSoup(content, "html.parser") try: soup.html.unwrap() except AttributeError: pass try: soup.head.unwrap() except AttributeError: pass try: soup.body.unwrap() except AttributeError: pass html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(soup.script.get_text())))) else: html = content if log.ThugOpts.features_logging: log.ThugLogging.Features.add_characters_count(len(html)) log.ThugLogging.Features.add_whitespaces_count(len([a for a in html if a.isspace()])) doc = w3c.parseString(html) window = Window('about:blank', doc, personality = log.ThugOpts.useragent) window.open() self.__run(window)
def run_local(self, url): log.ThugLogging.set_url(url) log.ThugOpts.local = True log.HTTPSession = HTTPSession() content = open(url, 'r').read() extension = os.path.splitext(url) if len(extension) > 1 and extension[1].lower() in ('.js'): html = tostring(E.HTML(E.BODY(E.SCRIPT(content)))) else: html = content doc = w3c.parseString(html) window = Window('about:blank', doc, personality=log.ThugOpts.useragent) window.open() self.run(window)
def run_local(self, url): log.ThugLogging.set_url(url) log.ThugOpts.local = True log.HTTPSession = HTTPSession() content = open(url, 'r').read() extension = os.path.splitext(url) if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ): html = tostring(E.HTML(E.BODY(E.SCRIPT(content)))) else: html = content doc = w3c.parseString(html) window = Window('about:blank', doc, personality = log.ThugOpts.useragent) window.open() self.__run(window)
def run_local(self, url): log.ThugLogging.set_url(url) log.ThugOpts.local = True log.HTTPSession = HTTPSession() content = open(url, 'r').read() extension = os.path.splitext(url) if len(extension) > 1 and extension[1].lower() in ( '.js', '.jse', ): if not content.lstrip().startswith('<script'): html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(content)))) else: soup = BeautifulSoup(content, "html.parser") try: soup.html.unwrap() except AttributeError: pass try: soup.head.unwrap() except AttributeError: pass try: soup.body.unwrap() except AttributeError: pass html = tostring( E.HTML(E.HEAD(), E.BODY(E.SCRIPT(soup.script.get_text())))) else: html = content doc = w3c.parseString(html) window = Window('about:blank', doc, personality=log.ThugOpts.useragent) window.open() self.__run(window)
def run_local(self, url): log.ThugLogging.set_url(url) log.ThugOpts.local = True log.HTTPSession = HTTPSession() content = open(url, 'r').read() extension = os.path.splitext(url) if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ): if not content.lstrip().startswith('<script'): html = tostring(E.HTML(E.BODY(E.SCRIPT(content)))) else: soup = BeautifulSoup(content, "html.parser") try: soup.html.unwrap() except AttributeError: pass try: soup.head.unwrap() except AttributeError: pass try: soup.body.unwrap() except AttributeError: pass html = tostring(E.HTML(E.BODY(E.SCRIPT(soup.script.get_text())))) else: html = content doc = w3c.parseString(html) window = Window('about:blank', doc, personality = log.ThugOpts.useragent) window.open() self.__run(window)
def run_remote(self, url): scheme = urlparse.urlparse(url).scheme if not scheme or not scheme.startswith('http'): url = 'http://%s' % (url, ) log.ThugLogging.set_url(url) log.HTTPSession = HTTPSession() doc = w3c.parseString('') window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent) window = window.open(url) if window: self.__run(window)
def run_remote(self, url): log.last_url = None log.last_url_fetched = None log.ThugOpts.local = False try: scheme = urlparse.urlparse(url).scheme except ValueError as e: log.warning("[WARNING] Analysis not performed (%s)", e.message) return if not scheme or not scheme.startswith('http'): url = 'http://%s' % (url, ) log.ThugLogging.set_url(url) log.HTTPSession = HTTPSession() doc = w3c.parseString('') window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent) window = window.open(url) if window: self.__run(window)