def enumerate_files(path, pattern): """Yields all filepaths from a directory.""" if os.path.isfile(path): yield path elif os.path.isdir(path): for dirname, dirnames, filenames in os.walk(path): for filename in filenames: filepath = os.path.join(dirname, filename) if os.path.isfile(filepath): if pattern: if fnmatch.fnmatch(filename, pattern): yield to_unicode(filepath) else: yield to_unicode(filepath)
def enumerate_files(path, pattern): """Yields all filepaths from a directory.""" if os.path.isfile(path): yield path elif os.path.isdir(path): for dirname, dirnames, filenames in os.walk(path): for filename in filenames: filepath = os.path.join(dirname, filename) if os.path.isfile(filepath): if pattern: if fnmatch.fnmatch(filename, pattern): yield to_unicode(filepath) else: yield to_unicode(filepath)
def run(self): ret = [] source = open(self.filepath, "rb").read() # Get rid of superfluous comments. source = re.sub("/\\*.*?\\*/", "", source, flags=re.S) for script in re.findall(self.script_re, source, re.I | re.S): try: x = bs4.BeautifulSoup(script, "html.parser") language = x.script.attrs.get("language", "").lower() except: language = None # We can't rely on bs4 or any other HTML/XML parser to provide us # with the raw content of the xml tag as they decode html entities # and all that, leaving us with a corrupted string. source = re.match("<.*>(.*)</.*>$", script, re.S).group(0) # Decode JScript.Encode encoding. if language in ("jscript.encode", "vbscript.encode"): source = self.decode(source) ret.append(to_unicode(source)) return ret
def submit_tasks(target, options, package, custom, owner, timeout, priority, machine, platform, memory, enforce_timeout, clock, tags, remote, pattern, maxcount, is_unique, is_url, is_baseline, is_shuffle): db = Database() data = dict( package=package or "", timeout=timeout, options=options, priority=priority, machine=machine, platform=platform, custom=custom, owner=owner, tags=tags, memory="1" if memory else "0", enforce_timeout="1" if enforce_timeout else "0", clock=clock, unique="1" if is_unique else "0", ) if is_baseline: if remote: print "Remote baseline support has not yet been implemented." return task_id = db.add_baseline(timeout, owner, machine, memory) yield "Baseline", machine, task_id return if is_url and is_unique: print "URL doesn't have --unique support yet." return if is_url: for url in target: if not remote: data.pop("unique", None) task_id = db.add_url(to_unicode(url), **data) yield "URL", url, task_id continue data["url"] = to_unicode(url) try: r = requests.post("http://%s/tasks/create/url" % remote, data=data) yield "URL", url, r.json()["task_id"] except Exception as e: print "%s: unable to submit URL: %s" % (bold(red("Error")), e) else: files = [] for path in target: files.extend(enumerate_files(os.path.abspath(path), pattern)) if is_shuffle: random.shuffle(files) for filepath in files: if not os.path.getsize(filepath): print "%s: sample %s (skipping file)" % (bold( yellow("Empty")), filepath) continue if maxcount is not None: if not maxcount: break maxcount -= 1 if not remote: if is_unique: sha256 = File(filepath).get_sha256() if db.find_sample(sha256=sha256): yield "File", filepath, None continue data.pop("unique", None) task_id = db.add_path(file_path=filepath, **data) yield "File", filepath, task_id continue files = { "file": (os.path.basename(filepath), open(filepath, "rb")), } try: r = requests.post("http://%s/tasks/create/file" % remote, data=data, files=files) yield "File", filepath, r.json()["task_id"] except Exception as e: print "%s: unable to submit file: %s" % (bold(red("Error")), e) continue
def submit_tasks(target, options, package, custom, owner, timeout, priority, machine, platform, memory, enforce_timeout, clock, tags, remote, pattern, maxcount, is_unique, is_url, is_baseline, is_shuffle): db = Database() data = dict( package=package or "", timeout=timeout, options=options, priority=priority, machine=machine, platform=platform, custom=custom, owner=owner, tags=tags, memory="1" if memory else "0", enforce_timeout="1" if enforce_timeout else "0", clock=clock, unique="1" if is_unique else "0", ) if is_baseline: if remote: print "Remote baseline support has not yet been implemented." return task_id = db.add_baseline(timeout, owner, machine, memory) yield "Baseline", machine, task_id return if is_url and is_unique: print "URL doesn't have --unique support yet." return if is_url: for url in target: if not remote: data.pop("unique", None) task_id = db.add_url(to_unicode(url), **data) yield "URL", url, task_id continue data["url"] = to_unicode(url) try: r = requests.post( "http://%s/tasks/create/url" % remote, data=data ) yield "URL", url, r.json()["task_id"] except Exception as e: print "%s: unable to submit URL: %s" % ( bold(red("Error")), e ) else: files = [] for path in target: files.extend(enumerate_files(os.path.abspath(path), pattern)) if is_shuffle: random.shuffle(files) for filepath in files: if not os.path.getsize(filepath): print "%s: sample %s (skipping file)" % ( bold(yellow("Empty")), filepath ) continue if maxcount is not None: if not maxcount: break maxcount -= 1 if not remote: if is_unique: sha256 = File(filepath).get_sha256() if db.find_sample(sha256=sha256): yield "File", filepath, None continue data.pop("unique", None) task_id = db.add_path(file_path=filepath, **data) yield "File", filepath, task_id continue files = { "file": (os.path.basename(filepath), open(filepath, "rb")), } try: r = requests.post( "http://%s/tasks/create/file" % remote, data=data, files=files ) yield "File", filepath, r.json()["task_id"] except Exception as e: print "%s: unable to submit file: %s" % ( bold(red("Error")), e ) continue