def log_redirect(self, response, window): self.log_cookies() if not response.history: if 'Set-Cookie' in response.headers: log.CookieClassifier.classify(response.url, response.headers['Set-Cookie']) if response.url: log.URLClassifier.classify(response.url) log.HTTPSession.fetch_ssl_certificate(response.url) return None final = response.url while final is None: # pragma: no cover for h in reversed(response.history): final = h.url for h in response.history: if 'Set-Cookie' in h.headers: log.CookieClassifier.classify(h.url, h.headers['Set-Cookie']) location = h.headers.get('location', None) self.add_behavior_warn( "[HTTP Redirection (Status: %s)] Content-Location: %s --> Location: %s" % (h.status_code, h.url, location)) location = log.HTTPSession.normalize_url(window, location) self.log_connection(h.url, location, "http-redirect") log.URLClassifier.classify(h.url) log.HTTPSession.fetch_ssl_certificate(h.url) ctype = h.headers.get('content-type', 'unknown') md5 = hashlib.md5() # nosec md5.update(h.content) sha256 = hashlib.sha256() sha256.update(h.content) mtype = Magic(h.content).get_mime() data = { "content": h.content, "status": h.status_code, "md5": md5.hexdigest(), "sha256": sha256.hexdigest(), "fsize": len(h.content), "ctype": ctype, "mtype": mtype } self.log_location(h.url, data) log.URLClassifier.classify(final) log.HTTPSession.fetch_ssl_certificate(final) return final
def Close(self): content = self.stream.getvalue() log.info(content) data = { 'content' : content, 'status' : 200, 'md5' : hashlib.md5(content).hexdigest(), 'sha256' : hashlib.sha256(content).hexdigest(), 'fsize' : len(content), 'ctype' : 'textstream', 'mtype' : Magic(content).get_mime(), } log.ThugLogging.log_location(log.ThugLogging.url, data) log.TextClassifier.classify(log.ThugLogging.url, content) if not log.ThugOpts.file_logging: return log_dir = os.path.join(log.ThugLogging.baseDir, "analysis", "textstream") try: os.makedirs(log_dir) except OSError as e: if e.errno == errno.EEXIST: pass else: raise filename = self._filename.split('\\')[-1] if '\\' in self._filename else self._filename log_file = os.path.join(log_dir, filename) with open(log_file, 'wb') as fd: fd.write(content)
def is_doc(self, data): if isinstance(data, str): data = data.encode() doc_mime_types = ( 'application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', ) return Magic(data).get_mime() in doc_mime_types
def Close(self): content = '\n'.join(self.stream) log.info(content) _content = content.encode() if isinstance(content, str) else content data = { 'content': content, 'status': 200, 'md5': hashlib.md5(_content).hexdigest(), # nosec 'sha256': hashlib.sha256(_content).hexdigest(), 'fsize': len(content), 'ctype': 'textstream', 'mtype': Magic(_content).get_mime(), } log.ThugLogging.log_location(log.ThugLogging.url, data) log.TextClassifier.classify(log.ThugLogging.url, content) if not log.ThugOpts.file_logging: return log_dir = os.path.join(log.ThugLogging.baseDir, "analysis", "textstream") try: os.makedirs(log_dir) except OSError as e: # pragma: no cover if e.errno == errno.EEXIST: pass else: raise filename = self._filename.split( '\\')[-1] if '\\' in self._filename else self._filename if not filename: # pragma: no cover filename = ''.join( random.choice(string.ascii_lowercase) for i in range(8)) log_file = os.path.join(log_dir, filename) with open(log_file, 'w') as fd: fd.write(content)
def fetch(self, url, method="GET", headers=None, body=None, redirect_type=None, params=None, snippet=None): log.URLClassifier.classify(url) # The command-line option -x (--local-nofetch) prevents remote # content fetching so raise an exception and exit the method. if log.HTTPSession.no_fetch: raise FetchForbidden # Do not attempt to fetch content if the URL is "about:blank". if log.HTTPSession.about_blank(url): raise AboutBlank # URL normalization and fixing (if broken and the option is # enabled). url = log.HTTPSession.normalize_url(self._window, url) if url is None: raise InvalidUrl last_url = getattr(log, 'last_url', None) if last_url is None: last_url = self._window.url if redirect_type: log.ThugLogging.add_behavior_warn( "[{} redirection] {} -> {}".format(redirect_type, last_url, url), snippet=snippet) log.ThugLogging.log_connection(last_url, url, redirect_type) else: log.ThugLogging.log_connection(last_url, url, "unknown") # The command-line option -t (--threshold) defines the maximum # number of pages to fetch. If the threshold is reached avoid # fetching the contents. if log.HTTPSession.threshold_expired(url): raise ThresholdExpired if headers is None: headers = dict() response = log.HTTPSession.fetch(url, method, self._window, self.userAgent, headers, body) if response is None: return None _url = log.ThugLogging.log_redirect(response, self._window) if _url: url = _url referer = response.request.headers.get('referer', 'None') log.ThugLogging.add_behavior_warn( "[HTTP] URL: {} (Status: {}, Referer: {})".format( url, response.status_code, referer), snippet=snippet) ctype = response.headers.get('content-type', 'unknown') mime_base = os.path.join(log.ThugLogging.baseDir, ctype) md5 = hashlib.md5() md5.update(response.content) sha256 = hashlib.sha256() sha256.update(response.content) mtype = Magic(response.content).get_mime() data = { "content": response.content, "status": response.status_code, "md5": md5.hexdigest(), "sha256": sha256.hexdigest(), "fsize": len(response.content), "ctype": ctype, "mtype": mtype } log.ThugLogging.add_behavior_warn( "[HTTP] URL: {} (Content-type: {}, MD5: {})".format( response.url, ctype, data["md5"]), snippet=snippet) log.ThugLogging.log_location(url, data) if response.history: location = response.headers.get('location', None) if location and redirect_type not in ( "URL found", "JNLP", "iframe", ): self._window.url = location if redirect_type in ("meta", ): self._window.url = url log.ThugLogging.store_content(mime_base, data["md5"], response.content) log.ThugLogging.log_file(response.content, response.url, params) if redirect_type in ( None, 'window open', 'iframe', 'http-redirect', 'meta', ): log.last_url = response.url handler = log.MIMEHandler.get_handler(mtype) if handler: handler(response.url, response.content) return response
def is_doc(self, data): data = data.encode() if isinstance(data, str) else data return Magic(data).get_mime() in self.doc_mime_types
def _doRun(self, p, stage): try: pefile.PE(data = p, fast_load = True) return except Exception: pass if not isinstance(p, six.string_types): return # pragma: no cover if log.ThugOpts.code_logging: log.ThugLogging.add_code_snippet(p, 'VBScript', 'Contained_Inside') log.ThugLogging.add_behavior_warn("[WScript.Shell ActiveX] Run (Stage %d) Code:\n%s" % (stage, p)) log.ThugLogging.log_exploit_event(self._window.url, "WScript.Shell ActiveX", "Run", data = { "stage" : stage, "code" : p, }, forward = False) s = None while True: if s is not None and len(s) < 2: break try: index = p.index('http') except ValueError: break p = p[index:] s = p.split() p = p[1:] url = s[0] url = url[:-1] if url.endswith(("'", '"')) else url url = url.split('"')[0] url = url.split("'")[0] log.ThugLogging.add_behavior_warn("[WScript.Shell ActiveX] Run (Stage %d) Downloading from URL %s" % (stage, url)) try: response = self._window._navigator.fetch(url, redirect_type = "doRun") except Exception: # pragma: no cover continue if response is None or not response.ok: continue # pragma: no cover md5 = hashlib.md5() # nosec md5.update(response.content) md5sum = md5.hexdigest() sha256 = hashlib.sha256() sha256.update(response.content) sha256sum = sha256.hexdigest() log.ThugLogging.add_behavior_warn("[WScript.Shell ActiveX] Run (Stage %d) Saving file %s" % (stage, md5sum, )) p = " ".join(s[1:]) data = { 'status' : response.status_code, 'content' : response.content, 'md5' : md5sum, 'sha256' : sha256sum, 'fsize' : len(response.content), 'ctype' : response.headers.get('content-type', 'unknown'), 'mtype' : Magic(response.content).get_mime(), } log.ThugLogging.log_location(url, data) log.TextClassifier.classify(url, response.content) self._doRun(response.content, stage + 1)
def fetch(self, url, method="GET", headers=None, body=None, redirect_type=None, params=None, snippet=None): if url and not isinstance(url, six.string_types): # pragma: no cover url = str(url) log.URLClassifier.classify(url) # The command-line option -x (--local-nofetch) prevents remote # content fetching so raise an exception and exit the method. if log.HTTPSession.no_fetch: raise FetchForbidden # Do not attempt to fetch content if the URL is "about:blank". if log.HTTPSession.about_blank(url): raise AboutBlank # URL normalization and fixing (if broken and the option is # enabled). url = log.HTTPSession.normalize_url(self._window, url) if url is None: raise InvalidUrl last_url = getattr(log, 'last_url', None) if last_url is None: last_url = self._window.url if redirect_type in ( 'frame', 'iframe', 'http-redirect', 'meta', ): if log.HTTPSession.check_equal_urls(url, last_url): # pragma: no cover log.ThugLogging.add_behavior_warn( "[Skipping {} redirection] {} -> {}".format( redirect_type, last_url, url), snippet=snippet) return None if redirect_type: log.ThugLogging.add_behavior_warn( "[{} redirection] {} -> {}".format(redirect_type, last_url, url), snippet=snippet) log.ThugLogging.log_connection(last_url, url, redirect_type) else: log.ThugLogging.log_connection(last_url, url, "unknown") # The command-line option -t (--threshold) defines the maximum # number of pages to fetch. If the threshold is reached avoid # fetching the contents. if log.HTTPSession.threshold_expired(url): raise ThresholdExpired if headers is None: headers = dict() response = log.HTTPSession.fetch(url, method, self._window, self.userAgent, headers, body) if response is None: return None _url = log.ThugLogging.log_redirect(response, self._window) if _url: url = _url referer = response.request.headers.get('referer', 'None') log.ThugLogging.add_behavior_warn( "[HTTP] URL: {} (Status: {}, Referer: {})".format( url, response.status_code, referer), snippet=snippet) ctype = response.headers.get('content-type', 'unknown') mime_base = os.path.join(log.ThugLogging.baseDir, ctype) md5 = hashlib.md5() # nosec md5.update(response.content) sha256 = hashlib.sha256() sha256.update(response.content) ssdeep_hash = ssdeep.hash(response.content) mtype = Magic(response.content).get_mime() data = { "content": response.content, "status": response.status_code, "md5": md5.hexdigest(), "sha256": sha256.hexdigest(), "ssdeep": ssdeep_hash, "fsize": len(response.content), "ctype": ctype, "mtype": mtype } log.ThugLogging.add_behavior_warn( "[HTTP] URL: {} (Content-type: {}, MD5: {})".format( response.url, ctype, data["md5"]), snippet=snippet) log.ThugLogging.log_location(url, data) log.ThugLogging.store_content(mime_base, data["md5"], response.content) log.ThugLogging.log_file(response.content, response.url, params) if redirect_type in ( 'window open', 'http-redirect', 'meta', ): self._window.url = log.HTTPSession.normalize_url( self._window, response.url) if redirect_type in ( None, 'window open', 'iframe', 'http-redirect', 'meta', ): log.last_url = response.url log.last_url_fetched = response.url log.ThugLogging.Screenshot.run(self._window, url, response, ctype) handler = log.MIMEHandler.get_handler(ctype) if handler: handler(response.url, response.content) response.thug_mimehandler_hit = True else: if log.ThugOpts.features_logging: log.ThugLogging.Features.add_characters_count( len(response.text)) log.ThugLogging.Features.add_whitespaces_count( len([a for a in response.text if a.isspace()])) return response
def _doRun(self, p, stage): if not isinstance(p, six.string_types): return try: pefile.PE(data=p, fast_load=True) return except: # pylint:disable=bare-except pass if log.ThugOpts.code_logging: log.ThugLogging.add_code_snippet(p, 'VBScript', 'Contained_Inside') log.ThugLogging.add_behavior_warn( "[Wscript.Shell ActiveX] Run (Stage %d) Code:\n%s" % (stage, p)) log.ThugLogging.log_exploit_event(self._window.url, "WScript.Shell ActiveX", "Run", data={ "stage": stage, "code": p, }, forward=False) while True: try: index = p.index('"http') except ValueError: break p = p[index + 1:] s = p.split('"') if len(s) < 2: break url = s[0] log.add_behavior_warn( "[Wscript.Shell ActiveX] Run (Stage %d) Downloading from URL %s" % (stage, url)) try: response = self._window._navigator.fetch(url, redirect_type="doRun") except: # pylint:disable=bare-except continue if response is None: continue if response.status_code == 404: continue md5 = hashlib.md5() md5.update(response.content) md5sum = md5.hexdigest() sha256 = hashlib.sha256() sha256.update(response.content) sha256sum = sha256.hexdigest() log.ThugLogging.add_behavior_warn( "[Wscript.Shell ActiveX] Run (Stage %d) Saving file %s" % ( stage, md5sum, )) p = '"'.join(s[1:]) data = { 'status': response.status_code, 'content': response.content, 'md5': md5sum, 'sha256': sha256sum, 'fsize': len(response.content), 'ctype': response.headers.get('content-type', 'unknown'), 'mtype': Magic(response.content).get_mime(), } log.ThugLogging.log_location(url, data) log.TextClassifier.classify(url, response.content) self._doRun(response.content, stage + 1)
def fetch(self, url, method = "GET", headers = None, body = None, redirect_type = None, params = None): # The command-line option -x (--local-nofetch) prevents remote # content fetching so raise an exception and exit the method. if log.HTTPSession.no_fetch: raise FetchForbidden # Do not attempt to fetch content if the URL is "about:blank". if log.HTTPSession.about_blank(url): raise AboutBlank # URL normalization and fixing (if broken and the option is # enabled). url = log.HTTPSession.normalize_url(self._window, url) if url is None: raise InvalidUrl if redirect_type: log.ThugLogging.add_behavior_warn(("[%s redirection] %s -> %s" % (redirect_type, self._window.url, url, ))) log.ThugLogging.log_connection(self._window.url, url, redirect_type) else: log.ThugLogging.log_connection(self._window.url, url, "unknown") # The command-line option -t (--threshold) defines the maximum # number of pages to fetch. If the threshold is reached avoid # fetching the contents. if log.HTTPSession.threshold_expired(url): raise ThresholdExpired # The command-line option -T (--timeout) set the analysis timeout # (in seconds). If the analysis lasts more than this value avoid # fetching the contents. if log.HTTPSession.timeout_expired(url): raise TimeoutExpired if headers is None: headers = dict() response = log.HTTPSession.fetch(url, method, self._window, self.userAgent, headers, body) if response is None: return None _url = log.ThugLogging.log_redirect(response) if _url: url = _url referer = response.request.headers.get('referer', 'None') log.ThugLogging.add_behavior_warn("[HTTP] URL: %s (Status: %s, Referer: %s)" % (url, response.status_code, referer, )) if log.HTTPSession.handle_status_code_error(response): #response.raise_for_status() return response ctype = response.headers.get('content-type', 'unknown') mime_base = os.path.join(log.ThugLogging.baseDir, ctype) md5 = hashlib.md5() md5.update(response.content) sha256 = hashlib.sha256() sha256.update(response.content) mtype = Magic(response.content).get_mime() data = { "content" : response.content, "md5" : md5.hexdigest(), "sha256" : sha256.hexdigest(), "fsize" : len(response.content), "ctype" : ctype, "mtype" : mtype } log.ThugLogging.add_behavior_warn("[HTTP] URL: %s (Content-type: %s, MD5: %s)" % (response.url, ctype, data["md5"])) log.ThugLogging.log_location(url, data) if response.history: location = response.headers.get('location', None) if location and redirect_type not in ("URL found", "JNLP", "iframe", ): self._window.url = location if redirect_type in ("meta", ): self._window.url = url log.ThugLogging.store_content(mime_base, data["md5"], response.content) log.ThugLogging.log_file(response.content, response.url, params) return response