def get_urls(self): """Extract all URLs embedded in this file through a simple regex.""" if not os.path.getsize(self.file_path): return [] # http://stackoverflow.com/a/454589 urls = set() f = open(self.file_path, "rb") m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ) for url in re.findall(URL_REGEX, m): url = list(url) if not url[2]: m = re.match(IP_REGEX, url[1]) if m is None: tld = "".join(url[1].split(".")[-1:]) while not is_whitelisted_tld(tld) and tld: url[1] = url[1][:-1] tld = tld[:-1] if not tld: continue if not is_whitelisted_domain(url[1]): if not is_whitelisted_url("".join(url)): urls.add("".join(url)) return list(urls)
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): dmp_path = os.path.join(self.pmemory_path, dmp) dmp_file = File(dmp_path) # Let's hope the file is not too big. buf = open(dmp_path, "rb").read() urls = set() for url in re.findall(HTTP_REGEX, buf): if not is_whitelisted_domain(url[1]): urls.add("".join(url)) proc = dict( file=dmp_path, pid=int(os.path.basename(dmp_path).split("-")[0]), yara=dmp_file.get_yara( os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")), urls=list(urls), ) results.append(proc) return results
def get_urls(self): """Extract all URLs embedded in this file through a simple regex.""" if not os.path.getsize(self.file_path): return [] # http://stackoverflow.com/a/454589 urls = set() f = open(self.file_path, "rb") m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ) for url in re.findall(URL_REGEX, m): url = list(url) if not url[2] : m = re.match(IP_REGEX,url[1]) if m is None : tld = "".join(url[1].split(".")[-1:]) while not is_whitelisted_tld (tld) and tld : url[1]=url[1][:-1] tld=tld[:-1] if not tld : continue if not is_whitelisted_domain(url[1]): if not is_whitelisted_url("".join(url)): urls.add("".join(url)) return list(urls)
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): dmp_path = os.path.join(self.pmemory_path, dmp) dmp_file = File(dmp_path) # Let's hope the file is not too big. buf = open(dmp_path, "rb").read() urls = set() for url in re.findall(HTTP_REGEX, buf): if not is_whitelisted_domain(url[1]): urls.add("".join(url)) proc = dict( file=dmp_path, pid=os.path.splitext(os.path.basename(dmp_path))[0], yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")), urls=list(urls), ) results.append(proc) return results
def extract_urls(self, filepath): # http://stackoverflow.com/a/454589 urls = set() f = open(filepath, "rb") m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ) for url in re.findall(HTTP_REGEX, m): if not is_whitelisted_domain(url[1]): urls.add("".join(url)) return urls
def get_urls(self): """Extract all URLs embedded in this file through a simple regex.""" if not os.path.getsize(self.file_path): return [] # http://stackoverflow.com/a/454589 urls = set() f = open(self.file_path, "rb") m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ) for url in re.findall(URL_REGEX, m): if not is_whitelisted_domain(url[1]): urls.add("".join(url)) return list(urls)