def test_mongo_connect_store_file(): set_cwd(tempfile.mkdtemp()) cuckoo_create(cfg={ "reporting": { "mongodb": { "enabled": True, "db": "cuckootest", }, }, }) mongo.init() assert mongo.database == "cuckootest" fd, filepath = tempfile.mkstemp() os.write(fd, "hello world") os.close(fd) f = File(filepath) r = MongoDB() r.init_once() id1 = r.store_file(f, "foobar.txt") id2 = r.store_file(f, "foobar.txt") assert id1 == id2 assert mongo.db.fs.files.find_one({ "sha256": f.get_sha256(), })["_id"] == id1 assert mongo.grid.get(id1).read() == "hello world"
def run(self): """Run analysis. @return: list of dropped files with related information. """ self.key = "dropped" dropped_files, meta = [], {} if os.path.exists(self.dropped_meta_path): for line in open(self.dropped_meta_path, "rb"): entry = json.loads(line) filepath = os.path.join(self.analysis_path, entry["path"]) meta[filepath] = { "pids": entry["pids"], "filepath": entry["filepath"], } for dir_name, dir_names, file_names in os.walk(self.dropped_path): for file_name in file_names: file_path = os.path.join(dir_name, file_name) file_info = File(file_path=file_path).get_all() file_info.update(meta.get(file_info["path"], {})) dropped_files.append(file_info) for dir_name, dir_names, file_names in os.walk(self.package_files): for file_name in file_names: file_path = os.path.join(dir_name, file_name) file_info = File(file_path=file_path).get_all() dropped_files.append(file_info) return dropped_files
def run(self): """Run androguard to extract static android information @return: list of static features """ self.key = "apkinfo" apkinfo = {} if "file" not in self.task["category"]: return from androguard.core.bytecodes.apk import APK from androguard.core.bytecodes.dvm import DalvikVMFormat from androguard.core.analysis.analysis import uVMAnalysis from androguard.core.analysis import analysis f = File(self.task["target"]) if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path) try: a = APK(self.file_path) if a.is_valid_APK(): manifest = {} apkinfo["files"] = self._apk_files(a) manifest["package"] = a.get_package() # manifest["permissions"]=a.get_details_permissions_new() manifest["main_activity"] = a.get_main_activity() manifest["activities"] = a.get_activities() manifest["services"] = a.get_services() manifest["receivers"] = a.get_receivers() # manifest["receivers_actions"]=a.get__extended_receivers() manifest["providers"] = a.get_providers() manifest["libraries"] = a.get_libraries() apkinfo["manifest"] = manifest # apkinfo["certificate"] = a.get_certificate() static_calls = {} if self.check_size(apkinfo["files"]): vm = DalvikVMFormat(a.get_dex()) vmx = uVMAnalysis(vm) static_calls["all_methods"] = self.get_methods(vmx) static_calls["is_native_code"] = analysis.is_native_code(vmx) static_calls["is_dynamic_code"] = analysis.is_dyn_code(vmx) static_calls["is_reflection_code"] = analysis.is_reflection_code(vmx) # static_calls["dynamic_method_calls"]= analysis.get_show_DynCode(vmx) # static_calls["reflection_method_calls"]= analysis.get_show_ReflectionCode(vmx) # static_calls["permissions_method_calls"]= analysis.get_show_Permissions(vmx) # static_calls["crypto_method_calls"]= analysis.get_show_CryptoCode(vmx) # static_calls["native_method_calls"]= analysis.get_show_NativeMethods(vmx) else: log.warning("Dex size bigger than: %s", self.options.decompilation_threshold) apkinfo["static_method_calls"] = static_calls except (IOError, OSError, zipfile.BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return apkinfo
def run(self): """Run Google play unofficial python api the get the google play information @return: list of google play features """ self.key = "googleplay" googleplay = {} if not HAVE_GOOGLEPLAY: log.error("Unable to import the GooglePlay library, has it been " "installed properly?") return if "file" not in self.task["category"]: return f = File(self.task["target"]) if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path) android_id = self.options.get("android_id") google_login = self.options.get("google_login") google_password = self.options.get("google_password") # auth_token = self.options.get("auth_token", None) if not android_id and not google_login and not google_password: raise CuckooProcessingError("Google Play Credentials not configured, skip") try: a = APK(self.file_path) if a.is_valid_APK(): package = a.get_package() # Connect api = GooglePlayAPI(android_id) api.login(google_login, google_password, None) # Get the version code and the offer type from the app details app_data = api.details(package) app_detail = app_data.docV2.details.appDetails if not app_detail.installationSize: return googleplay googleplay["title"] = app_detail.title googleplay["app_category"] = app_detail.appCategory._values googleplay["version_code"] = app_detail.versionCode googleplay["app_type"] = app_detail.appType googleplay["content_rating"] = app_detail.contentRating googleplay["developer_email"] = app_detail.developerEmail googleplay["developer_name"] = app_detail.developerName googleplay["developer_website"] = app_detail.developerWebsite googleplay["installation_size"] = app_detail.installationSize googleplay["num_downloads"] = app_detail.numDownloads googleplay["upload_date"] = app_detail.uploadDate googleplay["permissions"] = app_detail.permission._values except (IOError, OSError, zipfile.BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return googleplay
def run(self): """Run analysis. @return: results dict. """ self.key = "static" static = {} if self.task["category"] == "file": if not os.path.exists(self.file_path): return f = File(self.file_path) filename = os.path.basename(self.task["target"]) elif self.task["category"] == "archive": if not os.path.exists(self.file_path): return f = Archive(self.file_path).get_file( self.task["options"]["filename"] ) filename = os.path.basename(self.task["options"]["filename"]) else: return if filename: ext = filename.split(os.path.extsep)[-1].lower() else: ext = None package = self.task.get("package") if package == "exe" or ext == "exe" or "PE32" in f.get_type(): static.update(PortableExecutable(f.file_path).run()) static["keys"] = f.get_keys() if package == "wsf" or ext == "wsf": static["wsf"] = WindowsScriptFile(f.file_path).run() if package in ("doc", "ppt", "xls") or ext in self.office_ext: static["office"] = OfficeDocument(f.file_path).run() if package == "pdf" or ext == "pdf": static["pdf"] = dispatch( _pdf_worker, (f.file_path,), timeout=self.options.pdf_timeout ) if package == "lnk" or ext == "lnk": static["lnk"] = LnkShortcut(f.file_path).run() return static
def run(self): """Run analysis. @return: structured results. """ self.key = "procmemory" results = [] if os.path.exists(self.pmemory_path): for dmp in os.listdir(self.pmemory_path): if not dmp.endswith(".dmp"): continue dump_path = os.path.join(self.pmemory_path, dmp) dump_file = File(dump_path) pid, num = map(int, re.findall("(\\d+)", dmp)) regions = [] for region in roach.procmem(dump_path).regions: regions.append(region.to_json()) proc = dict( file=dump_path, pid=pid, num=num, yara=dump_file.get_yara("memory"), urls=list(dump_file.get_urls()), regions=regions, ) ExtractManager.for_task(self.task["id"]).peek_procmem(proc) if self.options.get("idapro"): self.create_idapy(proc) if self.options.get("extract_img"): proc["extracted"] = list(self.dump_images( proc, self.options.get("extract_dll") )) if self.options.get("dump_delete"): try: os.remove(dump_path) except OSError: log.error( "Unable to delete memory dump file at path \"%s\"", dump_path ) results.append(proc) results.sort(key=lambda x: (x["pid"], x["num"])) return results
def store_screenshots(request, task_id, body): if not body or not isinstance(body, list): return json_error_response("screenshots missing") report = ControlApi.get_report(int(task_id)) if not report: return json_error_response("report missing") for scr in body: sid = scr.get("id", None) data = scr.get("data", None) try: if sid is None or not data: raise ValueError ftype, b64 = data.split(",") if ftype != "data:image/png;base64": raise ValueError f = base64.b64decode(b64) if f[:4] != "\x89PNG": raise ValueError except ValueError: return json_error_response("invalid format") shot_path = cwd( "shots", "remotecontrol_%d.png" % int(sid), analysis=int(task_id) ) open(shot_path, "wb").write(f) shot_blob = {} shot = File(shot_path) if shot.valid(): shot_id = mdb.store_file(shot) shot_blob["original"] = shot_id if shot_blob: report["shots"].append(shot_blob) mdb.db.analysis.save(report) return JsonResponse({ "status": "success", })
def push_script(self, process, command): filepath = self.write_extracted(command.ext, command.get_script().encode("utf8")) if not filepath: return process = process or {} yara_matches = File(filepath).get_yara("scripts") self.items.append({ "category": "script", "program": command.program, "pid": process.get("pid"), "first_seen": process.get("first_seen"), "raw": filepath, "yara": yara_matches, "info": {}, }) for match in yara_matches: match = YaraMatch(match, "script") self.handle_yara(filepath, match)
def test_magic1(self): f = File("tests/files/foo.txt") assert "ASCII text" in f.get_type() assert f.get_content_type() == "text/plain"
def setup(self): # File() will invoke cwd(), so any CWD is required. set_cwd(tempfile.mkdtemp()) self.path = tempfile.mkstemp()[1] self.file = File(self.path)
class TestFile(object): def setup(self): # File() will invoke cwd(), so any CWD is required. set_cwd(tempfile.mkdtemp()) self.path = tempfile.mkstemp()[1] self.file = File(self.path) def test_get_name(self): assert self.path.split(os.sep)[-1] == self.file.get_name() def test_get_data(self): assert "" == self.file.get_data() def test_get_size(self): assert 0 == self.file.get_size() def test_get_crc32(self): assert "00000000" == self.file.get_crc32() def test_get_md5(self): assert "d41d8cd98f00b204e9800998ecf8427e" == self.file.get_md5() def test_get_sha1(self): assert "da39a3ee5e6b4b0d3255bfef95601890afd80709" == self.file.get_sha1() def test_get_sha256(self): assert "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" == self.file.get_sha256() def test_get_sha512(self): assert "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" == self.file.get_sha512() def test_get_ssdeep(self): try: import pydeep assert self.file.get_ssdeep() is not None pydeep # Fake usage. except ImportError: assert self.file.get_ssdeep() is None def test_get_type(self): assert "empty" in self.file.get_type() def test_get_content_type(self): assert self.file.get_content_type() in ["inode/x-empty", "application/x-empty"] def test_get_all_type(self): assert isinstance(self.file.get_all(), dict) def test_get_all_keys(self): for key in ["name", "size", "crc32", "md5", "sha1", "sha256", "sha512", "ssdeep", "type"]: assert key in self.file.get_all()
def run(self): """Run analysis. @return: results dict. """ self.key = "static" static = {} if self.task["category"] == "file": if not os.path.exists(self.file_path): return f = File(self.file_path) filename = os.path.basename(self.task["target"]) elif self.task["category"] == "archive": if not os.path.exists(self.file_path): return f = Archive(self.file_path).get_file( self.task["options"]["filename"]) filename = os.path.basename(self.task["options"]["filename"]) else: return if filename: ext = filename.split(os.path.extsep)[-1].lower() else: ext = None package = self.task.get("package") if package == "generic" and (ext == "elf" or "ELF" in f.get_type()): static["elf"] = ELF(f.file_path).run() static["keys"] = f.get_keys() if package == "exe" or ext == "exe" or "PE32" in f.get_type(): static.update(PortableExecutable(f.file_path).run()) static["keys"] = f.get_keys() if package == "wsf" or ext == "wsf": static["wsf"] = WindowsScriptFile(f.file_path).run() if package in ("doc", "ppt", "xls") or ext in self.office_ext: static["office"] = OfficeDocument(f.file_path, self.task["id"]).run() if package == "pdf" or ext == "pdf": if f.get_content_type() == "application/pdf": static["pdf"] = dispatch( _pdf_worker, (f.file_path, ), timeout=self.options.pdf_timeout) or [] else: static["pdf"] = [] if package == "generic" or ext == "lnk": static["lnk"] = LnkShortcut(f.file_path).run() if package == "apk" or ext == "apk" or any(t in f.get_type() for t in ("JAR", "Zip")): static["apkinfo"] = AndroidPackage(f.file_path).run() return static
def init(self): """Initialize the analysis.""" self.storage = cwd(analysis=self.task.id) # If the analysis storage folder already exists, we need to abort the # analysis or previous results will be overwritten and lost. if os.path.exists(self.storage): log.error( "Analysis results folder already exists at path \"%s\", " "analysis aborted", self.storage) return False # If we're not able to create the analysis storage folder, we have to # abort the analysis. # Also create all directories that the ResultServer can use for file # uploads. try: Folders.create(self.storage, RESULT_DIRECTORIES) except CuckooOperationalError: log.error("Unable to create analysis folder %s", self.storage) return False self.store_task_info() if self.task.category == "file" or self.task.category == "archive": # Check if we have permissions to access the file. # And fail this analysis if we don't have access to the file. if not os.access(self.task.target, os.R_OK): log.error( "Unable to access target file, please check if we have " "permissions to access the file: \"%s\"", self.task.target) return False # Check whether the file has been changed for some unknown reason. # And fail this analysis if it has been modified. # TODO Absorb the file upon submission. sample = self.db.view_sample(self.task.sample_id) sha256 = File(self.task.target).get_sha256() if sha256 != sample.sha256: log.error( "Target file has been modified after submission: \"%s\"", self.task.target) return False # Store a copy of the original file if does not exist already. # TODO This should be done at submission time. self.binary = cwd("storage", "binaries", sha256) if not os.path.exists(self.binary): try: shutil.copy(self.task.target, self.binary) except (IOError, shutil.Error): log.error( "Unable to store file from \"%s\" to \"%s\", " "analysis aborted", self.task.target, self.binary) return False # Each analysis directory contains a symlink/copy of the binary. try: self.storage_binary = os.path.join(self.storage, "binary") if hasattr(os, "symlink"): os.symlink(self.binary, self.storage_binary) else: shutil.copy(self.binary, self.storage_binary) except (AttributeError, OSError) as e: log.error( "Unable to create symlink/copy from \"%s\" to " "\"%s\": %s", self.binary, self.storage, e) return False # Initiates per-task logging. task_log_start(self.task.id) return True
def test_no_keys(self): assert File("tests/files/pdf0.pdf").get_keys() == []
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if "network" not in report: report["network"] = {} # This will likely hardcode the cuckoo.log to this point, but that # should be fine. if report.get("debug"): report["debug"]["cuckoo"] = list(report["debug"]["cuckoo"]) # Store path of the analysis path. report["info"]["analysis_path"] = self.analysis_path # Store the sample in GridFS. if results.get("info", {}).get("category") == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id mitmproxy_path = os.path.join(self.analysis_path, "dump.mitm") mitmpr = File(mitmproxy_path) if mitmpr.valid(): mitmpr_id = self.store_file(mitmpr) report["network"]["mitmproxy_id"] = mitmpr_id # Store the process memory dump file and extracted files in GridFS and # reference it back in the report. if "procmemory" in report and self.options.get("store_memdump", False): for idx, procmem in enumerate(report["procmemory"]): procmem_path = os.path.join( self.analysis_path, "memory", "%s.dmp" % procmem["pid"] ) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) procmem["procmem_id"] = procmem_id for extracted in procmem.get("extracted", []): f = File(extracted["path"]) if f.valid(): extracted["extracted_id"] = self.store_file(f) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped new_extracted = [] if "extracted" in report: for extracted in report["extracted"]: new_extr = dict(extracted) extr = File(extracted[extracted["category"]]) if extr.valid(): extr_id = self.store_file(extr) new_extr["object_id"] = extr_id new_extracted.append(new_extr) report["extracted"] = new_extracted # Add screenshots. report["shots"] = [] if os.path.exists(self.shots_path): # Walk through the files and select the JPGs. for shot_file in sorted(os.listdir(self.shots_path)): if not shot_file.endswith(".jpg") or "_" in shot_file: continue shot_path = os.path.join(self.shots_path, shot_file) shot_path_dir = os.path.dirname(shot_path) shot_file_name, shot_file_ext = os.path.splitext(shot_file) shot_path_resized = os.path.join(shot_path_dir, "%s_small%s" % (shot_file_name, shot_file_ext)) shot_blob = {} # If the screenshot path is a valid file, store it and # reference it back in the report. if os.path.isfile(shot_path): shot = File(shot_path) if shot.valid(): shot_id = self.store_file(shot) shot_blob["original"] = shot_id # Try to get the alternative (small) size for this image, # store it and reference it back in the report. if os.path.isfile(shot_path_resized): shot_small = File(shot_path_resized) if shot_small.valid(): shot_id = self.store_file(shot_small) shot_blob["small"] = shot_id if shot_blob: report["shots"].append(shot_blob) paginate = self.options.get("paginate", 100) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for call in process["calls"]: # If the chunk size is paginate or if the loop is # completed then store the chunk in MongoDB. if len(chunk) == paginate: to_insert = {"pid": process["pid"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["pid"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes if report.get("procmon"): procmon, chunk = [], [] for entry in report["procmon"]: if len(chunk) == paginate: procmon.append(self.db.procmon.insert(chunk)) chunk = [] chunk.append(entry) if chunk: procmon.append(self.db.procmon.insert(chunk)) report["procmon"] = procmon # Store the report and retrieve its object id. self.db.analysis.save(report)
def dump_images(self, process, drop_dlls=False): """Dump executable images from this process memory dump.""" buf = open(process["file"], "rb").read() images, capture, regions, end, pe = [], False, [], None, None for r in process["regions"]: off, size = r["offset"], r["size"] if capture: if int(r["end"], 16) > end: images.append((pe, regions)) capture = False else: regions.append(r) continue # We're going to take a couple of assumptions for granted here. # Namely, the PE header is fully intact, has not been tampered # with, and the DOS header, the NT header, and the Optional header # all remain in the first page/chunk of this PE file. if buf[off:off + 2] != "MZ": continue try: pe = pefile.PE(data=buf[off:off + size], fast_load=True) except pefile.PEFormatError: continue # Enable the capture of memory regions. capture, regions = True, [r] end = int(r["addr"], 16) + pe.OPTIONAL_HEADER.SizeOfImage # If present, also process the last loaded executable. if capture and regions: images.append((pe, regions)) for pe, regions in images: img = [] # Skip DLLs if requested to do so (the default). if pe.is_dll() and not drop_dlls: continue hdrsz = self._fixup_pe_header(pe) if not hdrsz: continue img.append(str(pe.write())[:hdrsz]) for idx, r in enumerate(regions): offset = r["offset"] if not idx: offset += hdrsz img.append(buf[offset:r["offset"] + r["size"]]) sha1 = hashlib.sha1("".join(img)).hexdigest() if pe.is_dll(): filename = "%s-%s.dll_" % (process["pid"], sha1[:16]) elif pe.is_exe(): filename = "%s-%s.exe_" % (process["pid"], sha1[:16]) else: log.warning("Unknown injected executable for pid=%s", process["pid"]) continue filepath = os.path.join(self.pmemory_path, filename) open(filepath, "wb").write("".join(img)) yield File(filepath).get_all()
def run(self): """Runs TIE processing @return: TIE results """ log.info("Processing TIE reputation analysis.") self.key = "tie" timeout = int(self.options.get("timeout", 60)) scan = int(self.options.get("scan", 0)) # Evaluate the original sample against TIE reputation if self.task["category"] == "file": # Create the client with DxlClient(config) as client: # Connect to the fabric client.connect() tie_client = TieClient(client) #Generate relevant hash information md5_hex = File(self.file_path).get_md5() sha1_hex = File(self.file_path).get_sha1() sha256_hex = File(self.file_path).get_sha256() #Request raw json reputation results reputations_dict = \ tie_client.get_file_reputation({ HashType.MD5: md5_hex, HashType.SHA1: sha1_hex, HashType.SHA256: sha256_hex }) #debug log.info("Raw TIE results: " + json.dumps(reputations_dict, sort_keys=True, indent=4, separators=(',', ': '))) #initialize result array and tiekey counter for each result proc_result = {} tiekey = 0 strtiekey = str(tiekey) # Display the Global Threat Intelligence if FileProvider.GTI in reputations_dict: gti_rep = reputations_dict[FileProvider.GTI] proc_result[strtiekey] = {} proc_result[strtiekey][ 'title'] = "Global Threat Intelligence (GTI) Test Date:" proc_result[strtiekey][ 'value'] = EpochMixin.to_localtime_string( gti_rep[ReputationProp.CREATE_DATE]) tiekey += 1 strtiekey = str(tiekey) #Set GTI Trust Level proc_result[strtiekey] = {} proc_result[strtiekey][ 'title'] = "Global Threat Intelligence (GTI) trust level:" trustValue = gti_rep[ReputationProp.TRUST_LEVEL] proc_result[strtiekey]['value'] = self.trustLevel( trustValue) tiekey += 1 strtiekey = str(tiekey) # Display the Enterprise reputation information if FileProvider.ENTERPRISE in reputations_dict: ent_rep = reputations_dict[FileProvider.ENTERPRISE] # Retrieve the enterprise reputation attributes ent_rep_attribs = ent_rep[ReputationProp.ATTRIBUTES] # Display prevalence (if it exists) if FileEnterpriseAttrib.PREVALENCE in ent_rep_attribs: proc_result[strtiekey] = {} proc_result[strtiekey][ 'title'] = "Enterprise prevalence:" proc_result[strtiekey]['value'] = ent_rep_attribs[ FileEnterpriseAttrib.PREVALENCE] tiekey += 1 strtiekey = str(tiekey) # Display first contact date (if it exists) if FileEnterpriseAttrib.FIRST_CONTACT in ent_rep_attribs: proc_result[strtiekey] = {} proc_result[strtiekey]['title'] = "First contact: " proc_result[strtiekey][ 'value'] = FileEnterpriseAttrib.to_localtime_string( ent_rep_attribs[ FileEnterpriseAttrib.FIRST_CONTACT]) tiekey += 1 strtiekey = str(tiekey) #These are lookup conversions for the ATD trust_score valueDict = {} valueDict['-1'] = "Known Trusted" valueDict['0'] = "Most Likely Trusted" valueDict['1'] = "Might Be Trusted" valueDict['2'] = "Unknown" valueDict['3'] = "Might Be Malicious" valueDict['4'] = "Most Likely Malicious" valueDict['5'] = "Known Malicious" valueDict['-2'] = "Not Set" # Display the ATD reputation information if FileProvider.ATD in reputations_dict: atd_rep = reputations_dict[FileProvider.ATD] # Retrieve the ATD reputation attributes atd_rep_attribs = atd_rep[ReputationProp.ATTRIBUTES] proc_result[strtiekey] = {} proc_result[strtiekey]['title'] = "ATD Test Date: " proc_result[strtiekey][ 'value'] = EpochMixin.to_localtime_string( atd_rep[ReputationProp.CREATE_DATE]) tiekey += 1 strtiekey = str(tiekey) # Display GAM Score (if it exists) if AtdAttrib.GAM_SCORE in atd_rep_attribs: proc_result[strtiekey] = {} proc_result[strtiekey][ 'title'] = "ATD Gateway AntiMalware Score: " proc_result[strtiekey]['value'] = valueDict[ atd_rep_attribs[AtdAttrib.GAM_SCORE]] tiekey += 1 strtiekey = str(tiekey) # Display AV Engine Score (if it exists) if AtdAttrib.AV_ENGINE_SCORE in atd_rep_attribs: proc_result[strtiekey] = {} proc_result[strtiekey][ 'title'] = "ATD AV Engine Score: " proc_result[strtiekey]['value'] = valueDict[ atd_rep_attribs[AtdAttrib.AV_ENGINE_SCORE]] tiekey += 1 strtiekey = str(tiekey) # Display Sandbox Score (if it exists) if AtdAttrib.SANDBOX_SCORE in atd_rep_attribs: proc_result[strtiekey] = {} proc_result[strtiekey]['title'] = "ATD Sandbox Score: " proc_result[strtiekey]['value'] = valueDict[ atd_rep_attribs[AtdAttrib.SANDBOX_SCORE]] tiekey += 1 strtiekey = str(tiekey) # Display Verdict (if it exists) if AtdAttrib.VERDICT in atd_rep_attribs: proc_result[strtiekey] = {} proc_result[strtiekey]['title'] = "ATD Verdict: " proc_result[strtiekey]['value'] = valueDict[ atd_rep_attribs[AtdAttrib.VERDICT]] tiekey += 1 strtiekey = str(tiekey) results = proc_result elif self.task["category"] == "url": return elif self.task["category"] == "baseline": return elif self.task["category"] == "service": return else: raise CuckooProcessingError("Unsupported task category: %s" % self.task["category"]) log.info("Finished processing TIE reputation analysis.") return results
def run(self): """Run Google play unofficial python api the get the google play information @return: list of google play features """ self.key = "googleplay" googleplay = {} if not HAVE_GOOGLEPLAY: log.error("Unable to import the GooglePlay library, has it been " "installed properly?") return if "file" not in self.task["category"]: return from androguard.core.bytecodes.apk import APK f = File(self.task["target"]) if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError( "Sample file doesn't exist: \"%s\"" % self.file_path) android_id = self.options.get("android_id") google_login = self.options.get("google_login") google_password = self.options.get("google_password") # auth_token = self.options.get("auth_token", None) if not android_id and not google_login and not google_password: raise CuckooProcessingError( "Google Play Credentials not configured, skip") try: a = APK(self.file_path) if a.is_valid_APK(): package = a.get_package() # Connect api = GooglePlayAPI(android_id) api.login(google_login, google_password, None) # Get the version code and the offer type from the app details app_data = api.details(package) app_detail = app_data.docV2.details.appDetails if not app_detail.installationSize: return googleplay googleplay["title"] = app_detail.title googleplay["app_category"] = app_detail.appCategory._values googleplay["version_code"] = app_detail.versionCode googleplay["app_type"] = app_detail.appType googleplay["content_rating"] = app_detail.contentRating googleplay["developer_email"] = app_detail.developerEmail googleplay["developer_name"] = app_detail.developerName googleplay[ "developer_website"] = app_detail.developerWebsite googleplay[ "installation_size"] = app_detail.installationSize googleplay["num_downloads"] = app_detail.numDownloads googleplay["upload_date"] = app_detail.uploadDate googleplay["permissions"] = app_detail.permission._values except (IOError, OSError, zipfile.BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return googleplay
def run(self): """Run androguard to extract static android information @return: list of static features """ self.key = "apkinfo" apkinfo = {} if "file" not in self.task["category"]: return from androguard.core.bytecodes.apk import APK from androguard.core.bytecodes.dvm import DalvikVMFormat from androguard.core.analysis.analysis import uVMAnalysis from androguard.core.analysis import analysis f = File(self.task["target"]) if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError( "Sample file doesn't exist: \"%s\"" % self.file_path) try: a = APK(self.file_path) if a.is_valid_APK(): manifest = {} apkinfo["files"] = self._apk_files(a) manifest["package"] = a.get_package() # manifest["permissions"]=a.get_details_permissions_new() manifest["main_activity"] = a.get_main_activity() manifest["activities"] = a.get_activities() manifest["services"] = a.get_services() manifest["receivers"] = a.get_receivers() # manifest["receivers_actions"]=a.get__extended_receivers() manifest["providers"] = a.get_providers() manifest["libraries"] = a.get_libraries() apkinfo["manifest"] = manifest # apkinfo["certificate"] = a.get_certificate() static_calls = {} if self.check_size(apkinfo["files"]): vm = DalvikVMFormat(a.get_dex()) vmx = uVMAnalysis(vm) static_calls["all_methods"] = self.get_methods(vmx) static_calls[ "is_native_code"] = analysis.is_native_code(vmx) static_calls["is_dynamic_code"] = analysis.is_dyn_code( vmx) static_calls[ "is_reflection_code"] = analysis.is_reflection_code( vmx) # static_calls["dynamic_method_calls"]= analysis.get_show_DynCode(vmx) # static_calls["reflection_method_calls"]= analysis.get_show_ReflectionCode(vmx) # static_calls["permissions_method_calls"]= analysis.get_show_Permissions(vmx) # static_calls["crypto_method_calls"]= analysis.get_show_CryptoCode(vmx) # static_calls["native_method_calls"]= analysis.get_show_NativeMethods(vmx) else: log.warning("Dex size bigger than: %s", self.options.decompilation_threshold) apkinfo["static_method_calls"] = static_calls except (IOError, OSError, zipfile.BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return apkinfo
def run(self): results = {} # Include any results provided by the mitm script. results["mitm"] = [] if os.path.exists(self.mitmout_path): for line in open(self.mitmout_path, "rb"): try: results["mitm"].append(json.loads(line)) except: results["mitm"].append(line) if not os.path.exists(self.pcap_path): log.warning("The PCAP file does not exist at path \"%s\".", self.pcap_path) return results if not os.path.getsize(self.pcap_path): log.error("The PCAP file at path \"%s\" is empty." % self.pcap_path) return results log.debug("keys: {0} , values : {1}".format(self.options.keys(), self.options.values())) filter_shark = self.options.get("filter_tshark") if filter_shark: log.debug("The filter of tshark is {0}.".format(filter_shark)) if not self.is_tool("tshark"): log.error( "The tshark is not present on your system. Please install it !" ) else: log.debug("Run tshark to filter pcap") filtered_path = self.pcap_path.replace("dump.", "dump_filtered.") filtered_1_path = self.pcap_path.replace( "dump.", "dump_filtered_1.") cmd = "tshark -F pcap -r " + self.pcap_path + " -Y \"" + filter_shark + "\" -w " + filtered_path log.debug("{0}".format(cmd)) p = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE) output, err = p.communicate() if err: log.error( "There is an error when filtering the pcap : %s from %s" % (err, self.pcap_path)) return results os.renames(self.pcap_path, filtered_1_path) os.renames(filtered_path, self.pcap_path) else: log.debug("The Tshark filter is not present.") # PCAP file hash. results["pcap_sha256"] = File(self.pcap_path).get_sha256() sorted_path = self.pcap_path.replace("dump.", "dump_sorted.") if config("cuckoo:processing:sort_pcap"): sort_pcap(self.pcap_path, sorted_path) # Sorted PCAP file hash. if os.path.exists(sorted_path): results["sorted_pcap_sha256"] = File(sorted_path).get_sha256() pcap_path = sorted_path else: pcap_path = self.pcap_path else: pcap_path = self.pcap_path results.update(Pcap(pcap_path, self.options).run()) if os.path.exists(pcap_path): try: p2 = Pcap2(pcap_path, self.get_tlsmaster(), self.network_path) results.update(p2.run()) except: log.exception("Error running httpreplay-based PCAP analysis") return results
def test_not_temporary_file(self): f = File("tests/files/pdf0.pdf") assert os.path.exists("tests/files/pdf0.pdf") del f assert os.path.exists("tests/files/pdf0.pdf")
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if "network" not in report: report["network"] = {} # This will likely hardcode the cuckoo.log to this point, but that # should be fine. if report.get("debug"): report["debug"]["cuckoo"] = list(report["debug"]["cuckoo"]) # Store path of the analysis path. report["info"]["analysis_path"] = self.analysis_path # Store the sample in GridFS. if results.get("info", {}).get("category") == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id mitmproxy_path = os.path.join(self.analysis_path, "dump.mitm") mitmpr = File(mitmproxy_path) if mitmpr.valid(): mitmpr_id = self.store_file(mitmpr) report["network"]["mitmproxy_id"] = mitmpr_id # Store the process memory dump file and extracted files in GridFS and # reference it back in the report. if "procmemory" in report and self.options.get("store_memdump", False): for idx, procmem in enumerate(report["procmemory"]): procmem_path = os.path.join( self.analysis_path, "memory", "%s.dmp" % procmem["pid"] ) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) procmem["procmem_id"] = procmem_id for extracted in procmem.get("extracted", []): f = File(extracted["path"]) if f.valid(): extracted["extracted_id"] = self.store_file(f) # Store the scripts that Floss generated in GredFS and reference # them back in the report. if "strings" in report: if "idapro_sct_name" in report["strings"]: idapro_sct_path = os.path.join( self.analysis_path, "str_script", report["strings"]["idapro_sct_name"] ) idapro_sct_file = File(idapro_sct_path) if idapro_sct_file.valid(): report["strings"]["idapro_sct_id"] = self.store_file(idapro_sct_file) if "radare_sct_name" in report["strings"]: radare_sct_path = os.path.join( self.analysis_path, "str_script", report["strings"]["radare_sct_name"] ) radare_sct_file = File(radare_sct_path) if radare_sct_file.valid(): report["strings"]["radare_sct_id"] = self.store_file(radare_sct_file) if "x64dbg_sct_name" in report["strings"]: x64dbg_sct_path = os.path.join( self.analysis_path, "str_script", report["strings"]["x64dbg_sct_name"] ) x64dbg_sct_file = File(x64dbg_sct_path) if x64dbg_sct_file.valid(): report["strings"]["x64dbg_sct_id"] = self.store_file(x64dbg_sct_file) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped new_extracted = [] if "extracted" in report: for extracted in report["extracted"]: new_extr = dict(extracted) extr = File(extracted["raw"]) if extr.valid(): extr_id = self.store_file(extr) new_extr["object_id"] = extr_id new_extracted.append(new_extr) report["extracted"] = new_extracted # Add screenshots. report["shots"] = [] if os.path.exists(self.shots_path): # Walk through the files and select the JPGs. for shot_file in sorted(os.listdir(self.shots_path)): if not shot_file.endswith(".jpg") or "_" in shot_file: continue shot_path = os.path.join(self.shots_path, shot_file) shot_path_dir = os.path.dirname(shot_path) shot_file_name, shot_file_ext = os.path.splitext(shot_file) shot_path_resized = os.path.join(shot_path_dir, "%s_small%s" % (shot_file_name, shot_file_ext)) shot_blob = {} # If the screenshot path is a valid file, store it and # reference it back in the report. if os.path.isfile(shot_path): shot = File(shot_path) if shot.valid(): shot_id = self.store_file(shot) shot_blob["original"] = shot_id # Try to get the alternative (small) size for this image, # store it and reference it back in the report. if os.path.isfile(shot_path_resized): shot_small = File(shot_path_resized) if shot_small.valid(): shot_id = self.store_file(shot_small) shot_blob["small"] = shot_id if shot_blob: report["shots"].append(shot_blob) paginate = self.options.get("paginate", 100) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for call in process["calls"]: # If the chunk size is paginate or if the loop is # completed then store the chunk in MongoDB. if len(chunk) == paginate: to_insert = {"pid": process["pid"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["pid"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes if report.get("procmon"): procmon, chunk = [], [] for entry in report["procmon"]: if len(chunk) == paginate: procmon.append(self.db.procmon.insert(chunk)) chunk = [] chunk.append(entry) if chunk: procmon.append(self.db.procmon.insert(chunk)) report["procmon"] = procmon # Store the report and retrieve its object id. self.db.analysis.save(report)
class TestFile(object): def setup(self): # File() will invoke cwd(), so any CWD is required. set_cwd(tempfile.mkdtemp()) self.path = tempfile.mkstemp()[1] self.file = File(self.path) def test_get_name(self): assert self.path.split(os.sep)[-1] == self.file.get_name() def test_get_data(self): assert "" == self.file.get_data() def test_get_size(self): assert 0 == self.file.get_size() def test_get_crc32(self): assert "00000000" == self.file.get_crc32() def test_get_md5(self): assert "d41d8cd98f00b204e9800998ecf8427e" == self.file.get_md5() def test_get_sha1(self): assert "da39a3ee5e6b4b0d3255bfef95601890afd80709" == self.file.get_sha1( ) def test_get_sha256(self): assert "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" == self.file.get_sha256( ) def test_get_sha512(self): assert "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" == self.file.get_sha512( ) def test_get_ssdeep(self): try: import pydeep assert self.file.get_ssdeep() is not None pydeep # Fake usage. except ImportError: assert self.file.get_ssdeep() is None def test_get_type(self): assert "empty" in self.file.get_type() def test_get_content_type(self): assert self.file.get_content_type() in [ "inode/x-empty", "application/x-empty" ] def test_get_all_type(self): assert isinstance(self.file.get_all(), dict) def test_get_all_keys(self): for key in [ "name", "size", "crc32", "md5", "sha1", "sha256", "sha512", "ssdeep", "type" ]: assert key in self.file.get_all()
def file_report(self, filepath, summary=False): """Get the report of an existing file scan. @param filepath: file path @param summary: if you want a summary report""" resource = File(filepath).get_md5() return self._get_report(self.FILE_REPORT, resource, summary)
def test_symlink_magic(self): filepath = tempfile.mktemp() os.symlink(__file__, filepath) assert File(filepath).get_type().startswith("Python script") assert File(filepath).get_content_type() == "text/x-python"
def submit_tasks(target, options, package, custom, owner, timeout, priority, machine, platform, memory, enforce_timeout, clock, tags, remote, pattern, maxcount, is_unique, is_url, is_baseline, is_shuffle): db = Database() data = dict( package=package or "", timeout=timeout, options=options, priority=priority, machine=machine, platform=platform, custom=custom, owner=owner, tags=tags, memory="1" if memory else "0", enforce_timeout="1" if enforce_timeout else "0", clock=clock, unique="1" if is_unique else "0", ) if is_baseline: if remote: print "Remote baseline support has not yet been implemented." return task_id = db.add_baseline(timeout, owner, machine, memory) yield "Baseline", machine, task_id return if is_url and is_unique: print "URL doesn't have --unique support yet." return if is_url: for url in target: if not remote: data.pop("unique", None) task_id = db.add_url(to_unicode(url), **data) yield "URL", url, task_id continue data["url"] = to_unicode(url) try: r = requests.post("http://%s/tasks/create/url" % remote, data=data) yield "URL", url, r.json()["task_id"] except Exception as e: print "%s: unable to submit URL: %s" % (bold(red("Error")), e) else: files = [] for path in target: files.extend(enumerate_files(os.path.abspath(path), pattern)) if is_shuffle: random.shuffle(files) for filepath in files: if not os.path.getsize(filepath): print "%s: sample %s (skipping file)" % (bold( yellow("Empty")), filepath) continue if maxcount is not None: if not maxcount: break maxcount -= 1 if not remote: if is_unique: sha256 = File(filepath).get_sha256() if db.find_sample(sha256=sha256): yield "File", filepath, None continue data.pop("unique", None) task_id = db.add_path(file_path=filepath, **data) yield "File", filepath, task_id continue files = { "file": (os.path.basename(filepath), open(filepath, "rb")), } try: r = requests.post("http://%s/tasks/create/file" % remote, data=data, files=files) yield "File", filepath, r.json()["task_id"] except Exception as e: print "%s: unable to submit file: %s" % (bold(red("Error")), e) continue
def run(self): """Run Floss on analyzed file. @return: Floss results dict. """ self.key = "strings" self.floss = self.options.get("floss") self.MIN_STRINGLEN = int(self.options.get("min_str_len")) self.MAX_STRINGLEN = self.options.get("max_str_len") self.MAX_STRINGCNT = self.options.get("max_str_cnt") self.MAX_FILESIZE = 16*1024*1024 STRING_TYPES = [ "decoded", "stack", "static" ] strings = {} if self.task["category"] == "file": if not os.path.exists(self.file_path): raise CuckooProcessingError( "Sample file doesn't exist: \"%s\"" % self.file_path ) try: f = File(self.file_path) filename = os.path.basename(self.task["target"]) base_name = os.path.splitext(filename)[0] ext = filename.split(os.path.extsep)[-1].lower() data = open(self.file_path, "r").read(self.MAX_FILESIZE) except (IOError, OSError) as e: raise CuckooProcessingError("Error opening file %s" % e) # Extract static strings static_strings = re.findall("[\x1f-\x7e]{" + str(self.MIN_STRINGLEN) + ",}", data) for s in re.findall("(?:[\x1f-\x7e][\x00]){" + str(self.MIN_STRINGLEN) + ",}", data): static_strings.append(s.decode("utf-16le")) if self.MAX_STRINGLEN != 0: for i, s in enumerate(static_strings): static_strings[i] = s[:self.MAX_STRINGLEN] if self.MAX_STRINGCNT != 0 and len(static_strings) > self.MAX_STRINGCNT: static_strings = static_strings[:self.MAX_STRINGCNT] static_strings.append("[snip]") package = self.task.get("package") if self.floss and (package == "exe" or ext == "exe" or "PE32" in f.get_type()): # Disable floss verbose logging main.set_logging_levels() try: # Prepare Floss for extracting hidden & encoded strings vw = vivisect.VivWorkspace() vw.loadFromFile(self.file_path) vw.analyze() selected_functions = main.select_functions(vw, None) decoding_functions_candidates = id_man.identify_decoding_functions( vw, main.get_all_plugins(), selected_functions ) except Exception as e: raise CuckooProcessingError("Error analyzing file with vivisect: %s" % e) try: # Decode & extract hidden & encoded strings decoded_strings = main.decode_strings( vw, decoding_functions_candidates, self.MIN_STRINGLEN ) decoded_strs = main.filter_unique_decoded(decoded_strings) stack_strings = stackstrings.extract_stackstrings( vw, selected_functions, self.MIN_STRINGLEN ) stack_strings = list(stack_strings) decoded_strings = [x for x in decoded_strs if not x in static_strings] except Exception as e: raise CuckooProcessingError("Error extracting strings with floss: %s" % e) if len(decoded_strings) or len(stack_strings): # Create annotated scripts if self.options.get("idapro_str_sct"): idapro_sct_name = base_name + ".idb" strings["idapro_sct_name"] = idapro_sct_name main.create_ida_script( self.file_path, os.path.join(self.str_script_path, idapro_sct_name), decoded_strings, stack_strings ) if self.options.get("radare_str_sct"): radare_sct_name = base_name + ".r2" strings["radare_sct_name"] = radare_sct_name main.create_r2_script( self.file_path, os.path.join(self.str_script_path, radare_sct_name), decoded_strings, stack_strings ) if self.options.get("x64dbg_str_sct"): x64dbg_sct_name = base_name + ".json" strings["x64dbg_sct_name"] = x64dbg_sct_name imagebase = vw.filemeta.values()[0]['imagebase'] main.create_x64dbg_database( self.file_path, os.path.join(self.str_script_path, base_name + ".json"), imagebase, decoded_strings ) # convert Floss strings into regular, readable strings for idx, s in enumerate(decoded_strings): decoded_strings[idx] = main.sanitize_string_for_printing(s.s) for idx, s in enumerate(stack_strings): stack_strings[idx] = s.s results = [decoded_strings, stack_strings, static_strings] for idx, str_type in enumerate(STRING_TYPES): strings[str_type] = results[idx] else: strings["static"] = static_strings return strings