def peid(self): def get_signatures(): with file(os.path.join(CIRTKIT_ROOT, 'data/peid/UserDB.TXT'), 'rt') as f: sig_data = f.read() signatures = peutils.SignatureDatabase(data=sig_data) return signatures def get_matches(pe, signatures): matches = signatures.match_all(pe, ep_only=True) return matches if not self.__check_session(): return signatures = get_signatures() peid_matches = get_matches(self.pe, signatures) if peid_matches: self.log('info', "PEiD Signatures:") for sig in peid_matches: if type(sig) is list: self.log('item', sig[0]) else: self.log('item', sig) else: self.log('info', "No PEiD signatures matched.") if self.args.scan and peid_matches: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_pe = pefile.PE(sample_path) cur_peid_matches = get_matches(cur_pe, signatures) except: continue if peid_matches == cur_peid_matches: matches.append([sample.name, sample.sha256]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'SHA256'], rows=matches))
def pehash(self): if not HAVE_PEHASH: self.log('error', "PEhash is missing. Please copy PEhash to the modules directory of Viper") return current_pehash = None if __sessions__.is_set(): current_pehash = calculate_pehash(__sessions__.current.file.path) self.log('info', "PEhash: {0}".format(bold(current_pehash))) if self.args.all or self.args.cluster or self.args.scan: db = Database() samples = db.find(key='all') rows = [] for sample in samples: sample_path = get_sample_path(sample.sha256) pe_hash = calculate_pehash(sample_path) if pe_hash: rows.append((sample.name, sample.md5, pe_hash)) if self.args.all: self.log('info', "PEhash for all files:") header = ['Name', 'MD5', 'PEhash'] self.log('table', dict(header=header, rows=rows)) elif self.args.cluster: self.log('info', "Clustering files by PEhash...") cluster = {} for sample_name, sample_md5, pe_hash in rows: cluster.setdefault(pe_hash, []).append([sample_name, sample_md5]) for item in cluster.items(): if len(item[1]) > 1: self.log('info', "PEhash cluster {0}:".format(bold(item[0]))) self.log('table', dict(header=['Name', 'MD5'], rows=item[1])) elif self.args.scan: if __sessions__.is_set() and current_pehash: self.log('info', "Finding matching samples...") matches = [] for row in rows: if row[1] == __sessions__.current.file.md5: continue if row[2] == current_pehash: matches.append([row[0], row[1]]) if matches: self.log('table', dict(header=['Name', 'MD5'], rows=matches)) else: self.log('info', "No matches found")
def compiletime(self): def get_compiletime(pe): return datetime.datetime.fromtimestamp( pe.FILE_HEADER.TimeDateStamp) if not self.__check_session(): return compile_time = get_compiletime(self.pe) self.log('info', "Compile Time: {0}".format(bold(compile_time))) if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_pe = pefile.PE(sample_path) cur_compile_time = get_compiletime(cur_pe) except: continue if compile_time == cur_compile_time: matches.append([sample.name, sample.md5, cur_compile_time]) else: if self.args.window: if cur_compile_time > compile_time: delta = (cur_compile_time - compile_time) elif cur_compile_time < compile_time: delta = (compile_time - cur_compile_time) delta_minutes = int(delta.total_seconds()) / 60 if delta_minutes <= self.args.window: matches.append( [sample.name, sample.md5, cur_compile_time]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log( 'table', dict(header=['Name', 'MD5', 'Compile Time'], rows=matches))
def cmd_tags(self, *args): parser = argparse.ArgumentParser(prog='tags', description="Modify tags of the opened file") parser.add_argument('-a', '--add', metavar='TAG', help="Add tags to the opened file (comma separated)") parser.add_argument('-d', '--delete', metavar='TAG', help="Delete a tag from the opened file") try: args = parser.parse_args(args) except: return # This command requires a session to be opened. if not __sessions__.is_set(): self.log('error', "No session opened") parser.print_usage() return # If no arguments are specified, there's not much to do. # However, it could make sense to also retrieve a list of existing # tags from this command, and not just from the "find" command alone. if args.add is None and args.delete is None: parser.print_usage() return # TODO: handle situation where addition or deletion of a tag fail. db = Database() if not db.find(key='sha256', value=__sessions__.current.file.sha256): self.log('error', "The opened file is not stored in the database. " "If you want to add it use the `store` command.") return if args.add: # Add specified tags to the database's entry belonging to # the opened file. db.add_tags(__sessions__.current.file.sha256, args.add) self.log('info', "Tags added to the currently opened file") # We refresh the opened session to update the attributes. # Namely, the list of tags returned by the 'info' command # needs to be re-generated, or it wouldn't show the new tags # until the existing session is closed a new one is opened. self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path) if args.delete: # Delete the tag from the database. db.delete_tag(args.delete, __sessions__.current.file.sha256) # Refresh the session so that the attributes of the file are # updated. self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path)
def compiletime(self): def get_compiletime(pe): return datetime.datetime.fromtimestamp(pe.FILE_HEADER.TimeDateStamp) if not self.__check_session(): return compile_time = get_compiletime(self.pe) self.log('info', "Compile Time: {0}".format(bold(compile_time))) if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_pe = pefile.PE(sample_path) cur_compile_time = get_compiletime(cur_pe) except: continue if compile_time == cur_compile_time: matches.append([sample.name, sample.md5, cur_compile_time]) else: if self.args.window: if cur_compile_time > compile_time: delta = (cur_compile_time - compile_time) elif cur_compile_time < compile_time: delta = (compile_time - cur_compile_time) delta_minutes = int(delta.total_seconds()) / 60 if delta_minutes <= self.args.window: matches.append([sample.name, sample.md5, cur_compile_time]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'MD5', 'Compile Time'], rows=matches))
def edit(self): db = Database() samples = db.find(key='all') filenames = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue filenames.append(sample.name) # from http://hetland.org/coding/python/levenshtein.py def levenshtein(a, b): "Calculates the Levenshtein distance between a and b." n, m = len(a), len(b) if n > m: # Make sure n <= m, to use O(min(n,m)) space a, b = b, a n, m = m, n current = range(n + 1) for i in range(1, m + 1): previous, current = current, [i] + [0] * n for j in range(1, n + 1): add, delete = previous[j] + 1, current[j - 1] + 1 change = previous[j - 1] if a[j - 1] != b[i - 1]: change = change + 1 current[j] = min(add, delete, change) return current[n] distance = [] for i in itertools.combinations(filenames, 2): edit = levenshtein(i[0], i[1]) distance.append(edit) self.log( 'info', "Average Edit distance: {0}".format(sum(distance) / len(distance)))
def edit(self): db = Database() samples = db.find(key='all') filenames = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue filenames.append(sample.name) # from http://hetland.org/coding/python/levenshtein.py def levenshtein(a, b): "Calculates the Levenshtein distance between a and b." n, m = len(a), len(b) if n > m: # Make sure n <= m, to use O(min(n,m)) space a, b = b, a n, m = m, n current = range(n + 1) for i in range(1, m + 1): previous, current = current, [i] + [0] * n for j in range(1, n + 1): add, delete = previous[j] + 1, current[j - 1] + 1 change = previous[j - 1] if a[j - 1] != b[i - 1]: change = change + 1 current[j] = min(add, delete, change) return current[n] distance = [] for i in itertools.combinations(filenames, 2): edit = levenshtein(i[0], i[1]) distance.append(edit) self.log('info', "Average Edit distance: {0}".format(sum(distance) / len(distance)))
def get_signed_samples(current=None, cert_filter=None): db = Database() samples = db.find(key='all') results = [] for sample in samples: # Skip if it's the same file. if current: if sample.sha256 == current: continue # Obtain path to the binary. sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue # Open PE instance. try: cur_pe = pefile.PE(sample_path) except: continue cur_cert_data = get_certificate(cur_pe) if not cur_cert_data: continue cur_cert_md5 = get_md5(cur_cert_data) if cert_filter: if cur_cert_md5 == cert_filter: results.append([sample.name, sample.md5]) else: results.append([sample.name, sample.md5, cur_cert_md5]) return results
def run(self): super(Fuzzy, self).run() if not HAVE_PYDEEP: self.log('error', "Missing dependency, install pydeep (`pip install pydeep`)") return arg_verbose = False arg_cluster = False if self.args: if self.args.verbose: arg_verbose = self.args.verbose if self.args.cluster: arg_cluster = self.args.cluster db = Database() samples = db.find(key='all') # Check if we're operating in cluster mode, otherwise we run on the # currently opened file. if arg_cluster: self.log('info', "Generating clusters, this might take a while...") clusters = dict() for sample in samples: if not sample.ssdeep: continue if arg_verbose: self.log('info', "Testing file {0} with ssdeep {1}".format( sample.md5, sample.ssdeep)) clustered = False for cluster_name, cluster_members in clusters.items(): # Check if sample is already in the cluster. if sample.md5 in cluster_members: continue if arg_verbose: self.log('info', "Testing {0} in cluser {1}".format( sample.md5, cluster_name)) for member in cluster_members: if sample.md5 == member[0]: continue member_hash = member[0] member_name = member[1] member_ssdeep = db.find(key='md5', value=member_hash)[0].ssdeep if pydeep.compare(sample.ssdeep, member_ssdeep) > 40: if arg_verbose: self.log('info', "Found home for {0} in cluster {1}".format( sample.md5, cluster_name)) clusters[cluster_name].append([sample.md5, sample.name]) clustered = True break if not clustered: cluster_id = len(clusters) + 1 clusters[cluster_id] = [[sample.md5, sample.name],] ordered_clusters = collections.OrderedDict(sorted(clusters.items())) self.log('info', "Following are the identified clusters with more than one member") for cluster_name, cluster_members in ordered_clusters.items(): # We include in the results only clusters with more than just # one member. if len(cluster_members) <= 1: continue self.log('info', "Ssdeep cluster {0}".format(bold(cluster_name))) self.log('table', dict(header=['MD5', 'Name'], rows=cluster_members)) # We're running against the already opened file. else: if not __sessions__.is_set(): self.log('error', "No session opened") return if not __sessions__.current.file.ssdeep: self.log('error', "No ssdeep hash available for opened file") return matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue if not sample.ssdeep: continue score = pydeep.compare(__sessions__.current.file.ssdeep, sample.ssdeep) if score > 40: matches.append(['{0}%'.format(score), sample.name, sample.sha256]) if arg_verbose: self.log('info', "Match {0}%: {2} [{1}]".format(score, sample.name, sample.sha256)) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Score', 'Name', 'SHA256'], rows=matches))
def language(self): def get_iat(pe): iat = [] if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): for peimport in pe.DIRECTORY_ENTRY_IMPORT: iat.append(peimport.dll) return iat def check_module(iat, match): for imp in iat: if imp.find(match) != -1: return True return False def is_cpp(data, cpp_count): for line in data: if 'type_info' in line or 'RTTI' in line: cpp_count += 1 break if cpp_count == 2: return True return False def is_delphi(data): for line in data: if 'Borland' in line: path = line.split('\\') for p in path: if 'Delphi' in p: return True return False def is_vbdotnet(data): for line in data: if 'Compiler' in line: stuff = line.split('.') if 'VisualBasic' in stuff: return True return False def is_autoit(data): for line in data: if 'AU3!' in line: return True return False def is_packed(pe): for section in pe.sections: if section.get_entropy() > 7: return True return False def get_strings(content): regexp = '[\x30-\x39\x41-\x5f\x61-\x7a\-\.:]{4,}' return re.findall(regexp, content) def find_language(iat, sample, content): dotnet = False cpp_count = 0 found = None # VB check if check_module(iat, 'VB'): self.log( 'info', "{0} - Possible language: Visual Basic".format( sample.name)) return True # .NET check if check_module(iat, 'mscoree.dll') and not found: dotnet = True found = '.NET' # C DLL check if not found and (check_module(iat, 'msvcr') or check_module( iat, 'MSVCR') or check_module(iat, 'c++')): cpp_count += 1 if not found: data = get_strings(content) if is_cpp(data, cpp_count) and not found: found = 'CPP' if not found and cpp_count == 1: found = 'C' if not dotnet and is_delphi(data) and not found: found = 'Delphi' if dotnet and is_vbdotnet(data): found = 'Visual Basic .NET' if is_autoit(data) and not found: found = 'AutoIt' return found if not self.__check_session(): return if is_packed(self.pe): self.log( 'warning', "Probably packed, the language guess might be unreliable") language = find_language(get_iat(self.pe), __sessions__.current.file, __sessions__.current.file.data) if language: self.log('info', "Probable language: {0}".format(bold(language))) else: self.log('error', "Programming language not identified") return if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_pe = pefile.PE(sample_path) except pefile.PEFormatError as e: continue cur_packed = '' if is_packed(cur_pe): cur_packed = 'Yes' cur_language = find_language(get_iat(cur_pe), sample, open(sample_path, 'rb').read()) if not cur_language: continue if cur_language == language: matches.append([sample.name, sample.md5, cur_packed]) if matches: self.log( 'table', dict(header=['Name', 'MD5', 'Is Packed'], rows=matches)) else: self.log('info', "No matches found")
def imphash(self): if self.args.scan and self.args.cluster: self.log('error', "You selected two exclusive options, pick one") return if self.args.cluster: self.log('info', "Clustering all samples by imphash...") db = Database() samples = db.find(key='all') cluster = {} for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_imphash = pefile.PE(sample_path).get_imphash() except: continue if cur_imphash not in cluster: cluster[cur_imphash] = [] cluster[cur_imphash].append([sample.sha256, sample.name]) for cluster_name, cluster_members in cluster.items(): # Skipping clusters with only one entry. if len(cluster_members) == 1: continue self.log('info', "Imphash cluster {0}".format(bold(cluster_name))) self.log('table', dict(header=['MD5', 'Name'], rows=cluster_members)) return if self.__check_session(): try: imphash = self.pe.get_imphash() except AttributeError: self.log( 'error', "No imphash support, upgrade pefile to a version >= 1.2.10-139 (`pip install --upgrade pefile`)" ) return self.log('info', "Imphash: {0}".format(bold(imphash))) if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_imphash = pefile.PE(sample_path).get_imphash() except: continue if imphash == cur_imphash: matches.append([sample.name, sample.sha256]) self.log( 'info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'SHA256'], rows=matches))
def resources(self): # Use this function to retrieve resources for the given PE instance. # Returns all the identified resources with indicators and attributes. def get_resources(pe): resources = [] if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): count = 1 for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries: try: resource = {} if resource_type.name is not None: name = str(resource_type.name) else: name = str( pefile.RESOURCE_TYPE.get( resource_type.struct.Id)) if name is None: name = str(resource_type.struct.Id) if hasattr(resource_type, 'directory'): for resource_id in resource_type.directory.entries: if hasattr(resource_id, 'directory'): for resource_lang in resource_id.directory.entries: data = pe.get_data( resource_lang.data.struct. OffsetToData, resource_lang.data.struct.Size) filetype = get_type(data) md5 = get_md5(data) language = pefile.LANG.get( resource_lang.data.lang, None) sublanguage = pefile.get_sublang_name_for_lang( resource_lang.data.lang, resource_lang.data.sublang) offset = ('%-8s' % hex(resource_lang.data.struct .OffsetToData)).strip() size = ( '%-8s' % hex(resource_lang.data.struct.Size) ).strip() resource = [ count, name, offset, md5, size, filetype, language, sublanguage ] # Dump resources if requested to and if the file currently being # processed is the opened session file. # This is to avoid that during a --scan all the resources being # scanned are dumped as well. if (self.args.open or self.args.dump ) and pe == self.pe: if self.args.dump: folder = self.args.dump else: folder = tempfile.mkdtemp() resource_path = os.path.join( folder, '{0}_{1}_{2}'.format( __sessions__.current.file. md5, offset, name)) resource.append(resource_path) with open(resource_path, 'wb') as resource_handle: resource_handle.write(data) resources.append(resource) count += 1 except Exception as e: self.log('error', e) continue return resources if not self.__check_session(): return # Obtain resources for the currently opened file. resources = get_resources(self.pe) if not resources: self.log('warning', "No resources found") return headers = [ '#', 'Name', 'Offset', 'MD5', 'Size', 'File Type', 'Language', 'Sublanguage' ] if self.args.dump or self.args.open: headers.append('Dumped To') self.log('table', dict(header=headers, rows=resources)) # If instructed, open a session on the given resource. if self.args.open: for resource in resources: if resource[0] == self.args.open: __sessions__.new(resource[8]) return # If instructed to perform a scan across the repository, start looping # through all available files. elif self.args.scan: self.log('info', "Scanning the repository for matching samples...") # Retrieve list of samples stored locally and available in the # database. db = Database() samples = db.find(key='all') matches = [] for sample in samples: # Skip if it's the same file. if sample.sha256 == __sessions__.current.file.sha256: continue # Obtain path to the binary. sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue # Open PE instance. try: cur_pe = pefile.PE(sample_path) except: continue # Obtain the list of resources for the current iteration. cur_resources = get_resources(cur_pe) matched_resources = [] # Loop through entry's resources. for cur_resource in cur_resources: # Loop through opened file's resources. for resource in resources: # If there is a common resource, add it to the list. if cur_resource[3] == resource[3]: matched_resources.append(resource[3]) # If there are any common resources, add the entry to the list # of matched samples. if len(matched_resources) > 0: matches.append([ sample.name, sample.md5, '\n'.join(r for r in matched_resources) ]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log( 'table', dict(header=['Name', 'MD5', 'Resource MD5'], rows=matches))
class Commands(object): output = [] def __init__(self): # Open connection to the database. self.db = Database() # Map commands to their related functions. self.commands = dict( help=dict(obj=self.cmd_help, description="Show this help message"), open=dict(obj=self.cmd_open, description="Open a file"), new=dict(obj=self.cmd_new, description="Create new file"), close=dict(obj=self.cmd_close, description="Close the current session"), info=dict(obj=self.cmd_info, description="Show information on the opened file"), notes=dict(obj=self.cmd_notes, description="View, add and edit notes in the current investigation"), clear=dict(obj=self.cmd_clear, description="Clear the console"), store=dict(obj=self.cmd_store, description="Store the opened file to the local repository"), delete=dict(obj=self.cmd_delete, description="Delete the opened file"), find=dict(obj=self.cmd_find, description="Find a file"), tags=dict(obj=self.cmd_tags, description="Modify tags of the opened file"), sessions=dict(obj=self.cmd_sessions, description="List or switch sessions"), stats=dict(obj=self.cmd_stats, description="Collection Statistics"), investigations=dict(obj=self.cmd_investigations, description="List or switch current investigations"), export=dict(obj=self.cmd_export, description="Export the current session to file or zip"), modules=dict(obj=self.cmd_modules, description="List available modules"), integrate=dict(obj=self.cmd_integrate, description="Interact with available integrations"), tokens=dict(obj=self.cmd_tokens, description="Store and retrieve API tokens for integrations and modules"), ) # Output Logging def log(self, event_type, event_data): self.output.append(dict( type=event_type, data=event_data )) ## # CLEAR # # This command simply clears the shell. def cmd_clear(self, *args): os.system('clear') ## # HELP # # This command simply prints the help message. # It lists both embedded commands and loaded modules. def cmd_help(self, *args): self.log('info', "Commands") # Build table of commands from commands dict above rows = [] for command_name, command_item in self.commands.items(): rows.append([command_name, command_item['description']]) rows.append(["exit, quit", "Quit CIRTKit"]) rows = sorted(rows, key=lambda entry: entry[0]) self.log('table', dict(header=['Command', 'Description'], rows=rows)) ## # MODULES # # Lists all modules discovered in the modules directory # and lists them by package name, by enumerating # over the directory we can rerun dynamically def cmd_modules(self, *args): parser = argparse.ArgumentParser(prog='modules', description="Lists modules", epilog="You can also specify -r to dynamically pickup new modules") parser.add_argument('-r', '--reload', action='store_true', help="Reload modules") moduleDict = __modules__ try: args = parser.parse_args(args) except: return if args.reload: from lib.core.plugins import load_modules moduleDict = load_modules() rows = [] for module_name, module_item in moduleDict.items(): rows.append([module_name, module_item['description']]) rows = sorted(rows, key=lambda entry: entry[0]) # Build table of modules from the modules available self.log('info', "Modules") self.log('table', dict(header=['Command', 'Description'], rows=rows)) ## # INTEGRATIONS # # Lists all integrations available def cmd_integrate(self, *args): parser = argparse.ArgumentParser(prog='integrate', description="Load integrations") parser.add_argument('-n', '--name', type=str, nargs=1, help="Load integration by name") parser.add_argument('-a', '--all', action='store_true', help="List all available integrations") integrateDict = __integrations__ try: args = parser.parse_args(args) except: return rows = [] for name, desc in integrateDict.items(): rows.append([name, desc['description']]) if args.all: rows = sorted(rows, key=lambda entry: entry[0]) # Build table of integrations self.log('info', "Integrations") self.log('table', dict(header=['Name', 'Description'], rows=rows)) elif args.name: name = args.name[0] item = integrateDict[args.name[0]]['obj']() print_info("Loading {0}\n".format(name)) item.load() else: parser.print_help() ## # TOKENS # # Store and retrieve tokens for app integrations # and for other modules used in cirtkit def cmd_tokens(self, *args): parser = argparse.ArgumentParser(prog='tokens', description="Store and retrieve API tokens") parser.add_argument('-d', '--delete', type=int, metavar="Token_ID", nargs=1, help="Delete token by ID") parser.add_argument('-a', '--add', action='store_true', help="Add a new API token") parser.add_argument('-l', '--list', action='store_true', help="List all configured API tokens") try: args = parser.parse_args(args) except: return if args.delete: tokenid = args.delete self.db.delete_token(tokenid) print_success("Token {0} deleted successfully!".format(tokenid)) elif args.add: print_info("Application new token will be used for:") appname = input("> ") print_info("API Token:") apitoken = input("> ") print_info("Username (if applicable):") username = input("> ") print_info("FQDN of remote server (format: ex.server.com:8000):") hostname = input("> ") if len(username) == 0: username = "" self.db.add_token(apitoken, username, appname, hostname) print_success("Token for {0} added successfully!".format(appname)) elif args.list: # Populate the list of search results. items = self.db.get_token_list() rows = [] for item in items: row = [item.id, item.app, item.user, item.fqdn] rows.append(row) if len(rows) < 1: print_info("No token profiles configured") return # Generate a table with the results. header = ['#', 'App', 'User', 'FQDN'] self.log("table", dict(header=header, rows=rows)) else: parser.print_help() ## # NEW # # This command is used to create a new session on a new file, # useful for copy & paste of content like Email headers def cmd_new(self, *args): title = input("Enter a title for the new file: ") # Create a new temporary file. tmp = tempfile.NamedTemporaryFile(delete=False) # Open the temporary file with the default editor, or with nano. os.system('"${EDITOR:-nano}" ' + tmp.name) __sessions__.new(tmp.name) __sessions__.current.file.name = title print_info("New file with title \"{0}\" added to the current session".format(bold(title))) ## # OPEN # # This command is used to open a session on a given file. # It either can be an external file path, or a SHA256 hash of a file which # has been previously imported and stored. # While the session is active, every operation and module executed will be # run against the file specified. def cmd_open(self, *args): parser = argparse.ArgumentParser(prog='open', description="Open a file", epilog="You can also specify a MD5 or SHA256 hash to a previously stored file in order to open a session on it.") group = parser.add_mutually_exclusive_group() group.add_argument('-f', '--file', action='store_true', help="Target is a file") group.add_argument('-u', '--url', action='store_true', help="Target is a URL") group.add_argument('-l', '--last', action='store_true', help="Target is the entry number from the last find command's results") parser.add_argument('-t', '--tor', action='store_true', help="Download the file through Tor") parser.add_argument("value", metavar='PATH, URL, HASH or ID', nargs='*', help="Target to open. Hash can be md5 or sha256. ID has to be from the last search.") try: args = parser.parse_args(args) except: return target = " ".join(args.value) if not args.last and target is None: parser.print_usage() return # If it's a file path, open a session on it. if args.file: target = os.path.expanduser(target) if not os.path.exists(target) or not os.path.isfile(target): self.log('error', "File not found: {0}".format(target)) return __sessions__.new(target) # If it's a URL, download it and open a session on the temporary file. elif args.url: data = download(url=target, tor=args.tor) if data: tmp = tempfile.NamedTemporaryFile(delete=False) tmp.write(data) tmp.close() __sessions__.new(tmp.name) # Try to open the specified file from the list of results from # the last find command. elif args.last: if __sessions__.find: count = 1 for item in __sessions__.find: if count == int(target): __sessions__.new(get_sample_path(item.sha256)) break count += 1 else: self.log('warning', "You haven't performed a find yet") # Otherwise we assume it's an hash of an previously stored sample. else: target = target.strip().lower() if len(target) == 32: key = 'md5' elif len(target) == 64: key = 'sha256' else: parser.print_usage() return rows = self.db.find(key=key, value=target) if not rows: self.log('warning', "No file found with the given hash {0}".format(target)) return path = get_sample_path(rows[0].sha256) if path: __sessions__.new(path) ## # CLOSE # # This command resets the open session. # After that, all handles to the opened file should be closed and the # shell should be restored to the default prompt. def cmd_close(self, *args): __sessions__.close() ## # INFO # # This command returns information on the open session. It returns details # on the file (e.g. hashes) and other information that might available from # the database. def cmd_info(self, *args): if __sessions__.is_set(): self.log('table', dict( header=['Key', 'Value'], rows=[ ['Name', __sessions__.current.file.name], ['Tags', __sessions__.current.file.tags], ['Path', __sessions__.current.file.path], ['Size', __sessions__.current.file.size], ['Type', __sessions__.current.file.type], ['Mime', __sessions__.current.file.mime], ['MD5', __sessions__.current.file.md5], ['SHA1', __sessions__.current.file.sha1], ['SHA256', __sessions__.current.file.sha256], ['SHA512', __sessions__.current.file.sha512], ['SSdeep', __sessions__.current.file.ssdeep], ['CRC32', __sessions__.current.file.crc32] ] )) ## # NOTES # # This command allows you to view, add, modify and delete notes associated # with the current investigation. def cmd_notes(self, *args): parser = argparse.ArgumentParser(prog="notes", description="Show information on the current investigation") group = parser.add_mutually_exclusive_group() group.add_argument('-l', '--list', action='store_true', help="List all notes available for the current investigation") group.add_argument('-a', '--add', action='store_true', help="Add a new note to the current investigation") group.add_argument('-v', '--view', metavar='NOTE', help="View the specified note") group.add_argument('-e', '--edit', metavar='NOTE', type=int, help="Edit an existing note") group.add_argument('-d', '--delete', metavar='NOTE', type=int, help="Delete an existing note") notepath = __project__.path + '/notes' notelist = os.listdir(__project__.path + '/notes') try: args = parser.parse_args(args) except: return if __project__.name is None: print_error('Cannot store notes in the default investigation. Please open a new case.') return if args.list: # Retrieve all notes for the currently opened investigation. pass if len(notelist) < 1: self.log('info', "No notes available for this investigation yet") return # Build table of existing case notes rows = [] notecount = 1 for note in notelist: rows.append([notecount, note]) notecount += 1 # Display list of existing notes. self.log('table', dict(header=['ID', 'Title'], rows=rows)) elif args.add: title = input("Enter a title for the new note: ") # Create a new temporary file. tmp = tempfile.NamedTemporaryFile(delete=False) # Open the temporary file with the default editor, or with nano. os.system('"${EDITOR:-nano}" ' + tmp.name) # Once the user is done editing, we need to read the content and # store it in the database. body = tmp.read() # store note in a file with open(notepath + '/' + title, 'w+') as note: note.write(body) # store note in the database # Finally, remove the temporary file. os.remove(tmp.name) self.log('info', "Note with title \"{0}\" added to the current investigation".format(bold(title))) elif args.view: # Retrieve note wth the specified ID and print it. title = args.view note = notepath + '/' + title if os.path.exists(note): self.log('info', bold('Title: ') + title) try: with open(note, 'r') as notehndle: self.log('info', bold('Body:') + '\n' + notehndle.read()) except IOError: print_error("Could not open note by title {0}".format(title)) else: self.log('info', "There is no note with title {0}".format(args.view)) elif args.edit: # Retrieve note with the specified ID. note = Database().get_note(args.edit) if note: # Create a new temporary file. tmp = tempfile.NamedTemporaryFile(delete=False) # Write the old body to the temporary file. tmp.write(note.body) tmp.close() # Open the old body with the text editor. os.system('"${EDITOR:-nano}" ' + tmp.name) # Read the new body from the temporary file. body = open(tmp.name, 'r').read() # Update the note entry with the new body. Database().edit_note(args.edit, body) # Remove the temporary file. os.remove(tmp.name) self.log('info', "Updated note with ID {0}".format(args.edit)) elif args.delete: # Delete the note with the specified ID. Database().delete_note(args.delete) else: parser.print_usage() ## # STORE # # This command stores the opened file in the local repository and tries # to store details in the database. def cmd_store(self, *args): parser = argparse.ArgumentParser(prog='store', description="Store the opened file in the current investigation") parser.add_argument('-d', '--delete', action='store_true', help="Delete the original file") parser.add_argument('-f', '--folder', type=str, nargs='+', help="Specify a folder to import") parser.add_argument('-s', '--file-size', type=int, help="Specify a maximum file size") parser.add_argument('-y', '--file-type', type=str, help="Specify a file type pattern") parser.add_argument('-n', '--file-name', type=str, help="Specify a file name pattern") parser.add_argument('-t', '--tags', type=str, nargs='+', help="Specify a list of comma-separated tags") try: args = parser.parse_args(args) except: return if args.folder is not None: # Allows to have spaces in the path. args.folder = " ".join(args.folder) if args.tags is not None: # Remove the spaces in the list of tags args.tags = "".join(args.tags) def add_file(obj, tags=None): if get_sample_path(obj.sha256): self.log('warning', "Skip, file \"{0}\" appears to be already stored".format(obj.name)) return False if __project__.name: pass else: print_error("Must open an investigation to store files") return False # Try to store file object into database. status = self.db.add(obj=obj, tags=tags) if status: # If succeeds, store also in the local repository. # If something fails in the database (for example unicode strings) # we don't want to have the binary lying in the repository with no # associated database record. new_path = store_sample(obj) self.log("success", "Stored file \"{0}\" to {1}".format(obj.name, new_path)) else: return False # Delete the file if requested to do so. if args.delete: try: os.unlink(obj.path) except Exception as e: self.log('warning', "Failed deleting file: {0}".format(e)) return True # If the user specified the --folder flag, we walk recursively and try # to add all contained files to the local repository. # This is not going to open a new session. # TODO: perhaps disable or make recursion optional? if args.folder is not None: # Check if the specified folder is valid. if os.path.isdir(args.folder): # Walk through the folder and subfolders. for dir_name, dir_names, file_names in walk(args.folder): # Add each collected file. for file_name in file_names: file_path = os.path.join(dir_name, file_name) if not os.path.exists(file_path): continue # Check if file is not zero. if not os.path.getsize(file_path) > 0: continue # Check if the file name matches the provided pattern. if args.file_name: if not fnmatch.fnmatch(file_name, args.file_name): # self.log('warning', "Skip, file \"{0}\" doesn't match the file name pattern".format(file_path)) continue # Check if the file type matches the provided pattern. if args.file_type: if args.file_type not in File(file_path).type: # self.log('warning', "Skip, file \"{0}\" doesn't match the file type".format(file_path)) continue # Check if file exceeds maximum size limit. if args.file_size: # Obtain file size. if os.path.getsize(file_path) > args.file_size: self.log('warning', "Skip, file \"{0}\" is too big".format(file_path)) continue file_obj = File(file_path) # Add file. add_file(file_obj, args.tags) else: self.log('error', "You specified an invalid folder: {0}".format(args.folder)) # Otherwise we try to store the currently opened file, if there is any. else: if __sessions__.is_set(): if __sessions__.current.file.size == 0: self.log('warning', "Skip, file \"{0}\" appears to be empty".format(__sessions__.current.file.name)) return False # Add file. if add_file(__sessions__.current.file, args.tags): # Open session to the new file. self.cmd_open(*[__sessions__.current.file.sha256]) else: self.log('error', "No session opened") ## # DELETE # # This commands deletes the currenlty opened file (only if it's stored in # the local repository) and removes the details from the database def cmd_delete(self, *args): if __sessions__.is_set(): while True: choice = input("Are you sure you want to delete this binary? Can't be reverted! [y/n] ") if choice == 'y': break elif choice == 'n': return rows = self.db.find('sha256', __sessions__.current.file.sha256) if rows: malware_id = rows[0].id if self.db.delete_file(malware_id): self.log("success", "File deleted") else: self.log('error', "Unable to delete file") os.remove(__sessions__.current.file.path) __sessions__.close() else: self.log('error', "No session opened") ## # FIND # # This command is used to search for files in the database. def cmd_find(self, *args): parser = argparse.ArgumentParser(prog='find', description="Find a file") group = parser.add_mutually_exclusive_group() group.add_argument('-t', '--tags', action='store_true', help="List available tags and quit") group.add_argument('type', nargs='?', choices=["all", "latest", "name", "type", "mime", "md5", "sha256", "tag", "note"], help="Where to search.") parser.add_argument("value", nargs='?', help="String to search.") try: args = parser.parse_args(args) except: return # One of the most useful search terms is by tag. With the --tags # argument we first retrieve a list of existing tags and the count # of files associated with each of them. if args.tags: # Retrieve list of tags. tags = self.db.list_tags() if tags: rows = [] # For each tag, retrieve the count of files associated with it. for tag in tags: count = len(self.db.find('tag', tag.tag)) rows.append([tag.tag, count]) # Generate the table with the results. header = ['Tag', '# Entries'] rows.sort(key=lambda x: x[1], reverse=True) self.log('table', dict(header=header, rows=rows)) else: self.log('warning', "No tags available") return # At this point, if there are no search terms specified, return. if args.type is None: parser.print_usage() return key = args.type if key != 'all' and key != 'latest': try: # The second argument is the search value. value = args.value except IndexError: self.log('error', "You need to include a search term.") return else: value = None # Search all the files matching the given parameters. items = self.db.find(key, value) if not items: return # Populate the list of search results. rows = [] count = 1 for item in items: tag = ', '.join([t.tag for t in item.tag if t.tag]) row = [count, item.name, item.mime, item.md5, tag] if key == 'latest': row.append(item.created_at) rows.append(row) count += 1 # Update find results in current session. __sessions__.find = items # Generate a table with the results. header = ['#', 'Name', 'Mime', 'MD5', 'Tags'] if key == 'latest': header.append('Created At') self.log("table", dict(header=header, rows=rows)) ## # TAGS # # This command is used to modify the tags of the opened file. def cmd_tags(self, *args): parser = argparse.ArgumentParser(prog='tags', description="Modify tags of the opened file") parser.add_argument('-a', '--add', metavar='TAG', help="Add tags to the opened file (comma separated)") parser.add_argument('-d', '--delete', metavar='TAG', help="Delete a tag from the opened file") try: args = parser.parse_args(args) except: return # This command requires a session to be opened. if not __sessions__.is_set(): self.log('error', "No session opened") parser.print_usage() return # If no arguments are specified, there's not much to do. # However, it could make sense to also retrieve a list of existing # tags from this command, and not just from the "find" command alone. if args.add is None and args.delete is None: parser.print_usage() return # TODO: handle situation where addition or deletion of a tag fail. db = Database() if not db.find(key='sha256', value=__sessions__.current.file.sha256): self.log('error', "The opened file is not stored in the database. " "If you want to add it use the `store` command.") return if args.add: # Add specified tags to the database's entry belonging to # the opened file. db.add_tags(__sessions__.current.file.sha256, args.add) self.log('info', "Tags added to the currently opened file") # We refresh the opened session to update the attributes. # Namely, the list of tags returned by the 'info' command # needs to be re-generated, or it wouldn't show the new tags # until the existing session is closed a new one is opened. self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path) if args.delete: # Delete the tag from the database. db.delete_tag(args.delete, __sessions__.current.file.sha256) # Refresh the session so that the attributes of the file are # updated. self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path) ### # SESSION # # This command is used to list and switch across all the opened sessions. def cmd_sessions(self, *args): parser = argparse.ArgumentParser(prog='sessions', description="Open a file", epilog="List or switch sessions") group = parser.add_mutually_exclusive_group() group.add_argument('-l', '--list', action='store_true', help="List all existing sessions") group.add_argument('-s', '--switch', type=int, help="Switch to the specified session") try: args = parser.parse_args(args) except: return if args.list: if not __sessions__.sessions: self.log('info', "There are no opened sessions") return rows = [] for session in __sessions__.sessions: current = '' if session == __sessions__.current: current = 'Yes' rows.append([ session.id, session.file.name, session.file.md5, session.created_at, current ]) self.log('info', "Opened Sessions:") self.log("table", dict(header=['#', 'Name', 'MD5', 'Created At', 'Current'], rows=rows)) elif args.switch: for session in __sessions__.sessions: if args.switch == session.id: __sessions__.switch(session) return self.log('warning', "The specified session ID doesn't seem to exist") else: parser.print_usage() ## # INVESTIGATIONS # # This command retrieves a list of all projects. # You can also switch to a different project. def cmd_investigations(self, *args): parser = argparse.ArgumentParser(prog='investigations', description="Open a case", epilog="List or switch current investigations") group = parser.add_mutually_exclusive_group() group.add_argument('-l', '--list', action='store_true', help="List all existing investigations") group.add_argument('-s', '--switch', metavar='NAME', help="Switch to the specified investigation") group.add_argument('-d', '--delete', type=int, metavar='ID', help="delete investigation by id.") try: args = parser.parse_args(args) except: return projects_path = os.path.join(os.getcwd(), 'investigations') if not os.path.exists(projects_path): self.log('info', "The investigations directory does not exist yet") return if args.list: self.log('info', "Current Investigations:") rows = [] items = self.db.get_investigation_list() # Populate the list of search results. count = 1 for item in items: row = [item.id, item.name] rows.append(row) self.log('table', dict(header=['ID', 'Name'], rows=rows)) elif args.switch: if __sessions__.is_set(): __sessions__.close() self.log('info', "Closed opened session") __project__.open(args.switch, self.db) self.log('info', "Switched to investigation {0}".format(bold(args.switch))) # Need to re-initialize the Database to open the new SQLite file. self.db = Database() elif args.delete: if __sessions__.is_set(): __sessions__.close() self.log('info', "Closed opened session") __project__.delete(args.delete, self.db) self.log('info', "Deleted investigation {0}".format(bold(args.delete))) # Need to re-initialize the Database to open the new SQLite file. self.db = Database() else: self.log('info', parser.print_usage()) ## # EXPORT # # This command will export the current session to file or zip. def cmd_export(self, *args): parser = argparse.ArgumentParser(prog='export', description="Export the current session to file or zip") parser.add_argument('-z', '--zip', action='store_true', help="Export session in a zip archive") parser.add_argument('value', help="path or archive name") try: args = parser.parse_args(args) except: return # This command requires a session to be opened. if not __sessions__.is_set(): self.log('error', "No session opened") parser.print_usage() return # Check for valid export path. if args.value is None: parser.print_usage() return # TODO: having for one a folder and for the other a full # target path can be confusing. We should perhaps standardize this. # Abort if the specified path already exists. if os.path.isfile(args.value): self.log('error', "File at path \"{0}\" already exists, abort".format(args.value)) return # If the argument chosed so, archive the file when exporting it. # TODO: perhaps add an option to use a password for the archive # and default it to "infected". if args.zip: try: with ZipFile(args.value, 'w') as export_zip: export_zip.write(__sessions__.current.file.path, arcname=__sessions__.current.file.name) except IOError as e: self.log('error', "Unable to export file: {0}".format(e)) else: self.log('info', "File archived and exported to {0}".format(args.value)) # Otherwise just dump it to the given directory. else: # XXX: Export file with the original file name. store_path = os.path.join(args.value, __sessions__.current.file.name) try: shutil.copyfile(__sessions__.current.file.path, store_path) except IOError as e: self.log('error', "Unable to export file: {0}".format(e)) else: self.log('info', "File exported to {0}".format(store_path)) ## # Stats # # This command allows you to generate basic statistics for the stored files. def cmd_stats(self, *args): parser = argparse.ArgumentParser(prog='stats', description="Display Database File Statistics") parser.add_argument('-t', '--top', type=int, help='Top x Items') try: args = parser.parse_args(args) except: return arg_top = args.top db = Database() # Set all Counters Dict extension_dict = defaultdict(int) mime_dict = defaultdict(int) tags_dict = defaultdict(int) size_list = [] # Find all items = self.db.find('all') if len(items) < 1: self.log('info', "No items in database to generate stats") return # Sort in to stats for item in items: if '.' in item.name: ext = item.name.split('.') extension_dict[ext[-1]] += 1 mime_dict[item.mime] += 1 size_list.append(item.size) for t in item.tag: if t.tag: tags_dict[t.tag] += 1 avg_size = sum(size_list) / len(size_list) all_stats = {'Total':len(items), 'File Extension':extension_dict, 'Mime':mime_dict, 'Tags':tags_dict, 'Avg Size':avg_size, 'Largest':max(size_list), 'Smallest':min(size_list)} # Counter for top x if arg_top: counter = arg_top prefix = 'Top {0} '.format(counter) else: counter = len(items) prefix = '' # Project Stats Last as i have it iterate them all # Print all the results self.log('info', "Projects") self.log('table', dict(header=['Name', 'Count'], rows=[['Main', len(items)], ['Next', '10']])) # For Current Project self.log('info', "Current Project") # Extension self.log('info', "{0}Extensions".format(prefix)) header = ['Ext', 'Count'] rows = [] for k in sorted(extension_dict, key=extension_dict.get, reverse=True)[:counter]: rows.append([k, extension_dict[k]]) self.log('table', dict(header=header, rows=rows)) # Mimes self.log('info', "{0}Mime Types".format(prefix)) header = ['Mime', 'Count'] rows = [] for k in sorted(mime_dict, key=mime_dict.get, reverse=True)[:counter]: rows.append([k, mime_dict[k]]) self.log('table', dict(header=header, rows=rows)) # Tags self.log('info', "{0}Tags".format(prefix)) header = ['Tag', 'Count'] rows = [] for k in sorted(tags_dict, key=tags_dict.get, reverse=True)[:counter]: rows.append([k, tags_dict[k]]) self.log('table', dict(header=header, rows=rows)) # Size self.log('info', "Size Stats") self.log('item', "Largest {0}".format(convert_size(max(size_list)))) self.log('item', "Smallest {0}".format(convert_size(min(size_list)))) self.log('item', "Average {0}".format(convert_size(avg_size)))
def entrypoint(self): if self.args.scan and self.args.cluster: self.log('error', "You selected two exclusive options, pick one") return if self.args.all: db = Database() samples = db.find(key='all') rows = [] for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE(sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue rows.append([sample.md5, sample.name, cur_ep]) self.log('table', dict(header=['MD5', 'Name', 'AddressOfEntryPoint'], rows=rows)) return if self.args.cluster: db = Database() samples = db.find(key='all') cluster = {} for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE(sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue if cur_ep not in cluster: cluster[cur_ep] = [] cluster[cur_ep].append([sample.md5, sample.name]) for cluster_name, cluster_members in cluster.items(): # Skipping clusters with only one entry. if len(cluster_members) == 1: continue self.log('info', "AddressOfEntryPoint cluster {0}".format(bold(cluster_name))) self.log('table', dict(header=['MD5', 'Name'], rows=cluster_members)) return if not self.__check_session(): return ep = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint self.log('info', "AddressOfEntryPoint: {0}".format(ep)) if self.args.scan: db = Database() samples = db.find(key='all') rows = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE(sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue if ep == cur_ep: rows.append([sample.md5, sample.name]) self.log('info', "Following are samples with AddressOfEntryPoint {0}".format(bold(ep))) self.log('table', dict(header=['MD5', 'Name'], rows=rows))
def entrypoint(self): if self.args.scan and self.args.cluster: self.log('error', "You selected two exclusive options, pick one") return if self.args.all: db = Database() samples = db.find(key='all') rows = [] for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE( sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue rows.append([sample.md5, sample.name, cur_ep]) self.log( 'table', dict(header=['MD5', 'Name', 'AddressOfEntryPoint'], rows=rows)) return if self.args.cluster: db = Database() samples = db.find(key='all') cluster = {} for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE( sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue if cur_ep not in cluster: cluster[cur_ep] = [] cluster[cur_ep].append([sample.md5, sample.name]) for cluster_name, cluster_members in cluster.items(): # Skipping clusters with only one entry. if len(cluster_members) == 1: continue self.log( 'info', "AddressOfEntryPoint cluster {0}".format( bold(cluster_name))) self.log('table', dict(header=['MD5', 'Name'], rows=cluster_members)) return if not self.__check_session(): return ep = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint self.log('info', "AddressOfEntryPoint: {0}".format(ep)) if self.args.scan: db = Database() samples = db.find(key='all') rows = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE( sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue if ep == cur_ep: rows.append([sample.md5, sample.name]) self.log( 'info', "Following are samples with AddressOfEntryPoint {0}".format( bold(ep))) self.log('table', dict(header=['MD5', 'Name'], rows=rows))
def pehash(self): if not HAVE_PEHASH: self.log( 'error', "PEhash is missing. Please copy PEhash to the modules directory of Viper" ) return current_pehash = None if __sessions__.is_set(): current_pehash = calculate_pehash(__sessions__.current.file.path) self.log('info', "PEhash: {0}".format(bold(current_pehash))) if self.args.all or self.args.cluster or self.args.scan: db = Database() samples = db.find(key='all') rows = [] for sample in samples: sample_path = get_sample_path(sample.sha256) pe_hash = calculate_pehash(sample_path) if pe_hash: rows.append((sample.name, sample.md5, pe_hash)) if self.args.all: self.log('info', "PEhash for all files:") header = ['Name', 'MD5', 'PEhash'] self.log('table', dict(header=header, rows=rows)) elif self.args.cluster: self.log('info', "Clustering files by PEhash...") cluster = {} for sample_name, sample_md5, pe_hash in rows: cluster.setdefault(pe_hash, []).append([sample_name, sample_md5]) for item in cluster.items(): if len(item[1]) > 1: self.log('info', "PEhash cluster {0}:".format(bold(item[0]))) self.log('table', dict(header=['Name', 'MD5'], rows=item[1])) elif self.args.scan: if __sessions__.is_set() and current_pehash: self.log('info', "Finding matching samples...") matches = [] for row in rows: if row[1] == __sessions__.current.file.md5: continue if row[2] == current_pehash: matches.append([row[0], row[1]]) if matches: self.log('table', dict(header=['Name', 'MD5'], rows=matches)) else: self.log('info', "No matches found")
def run(self): super(Fuzzy, self).run() if not HAVE_PYDEEP: self.log( 'error', "Missing dependency, install pydeep (`pip install pydeep`)") return arg_verbose = False arg_cluster = False if self.args: if self.args.verbose: arg_verbose = self.args.verbose if self.args.cluster: arg_cluster = self.args.cluster db = Database() samples = db.find(key='all') # Check if we're operating in cluster mode, otherwise we run on the # currently opened file. if arg_cluster: self.log('info', "Generating clusters, this might take a while...") clusters = dict() for sample in samples: if not sample.ssdeep: continue if arg_verbose: self.log( 'info', "Testing file {0} with ssdeep {1}".format( sample.md5, sample.ssdeep)) clustered = False for cluster_name, cluster_members in clusters.items(): # Check if sample is already in the cluster. if sample.md5 in cluster_members: continue if arg_verbose: self.log( 'info', "Testing {0} in cluser {1}".format( sample.md5, cluster_name)) for member in cluster_members: if sample.md5 == member[0]: continue member_hash = member[0] member_name = member[1] member_ssdeep = db.find( key='md5', value=member_hash)[0].ssdeep if pydeep.compare(sample.ssdeep, member_ssdeep) > 40: if arg_verbose: self.log( 'info', "Found home for {0} in cluster {1}". format(sample.md5, cluster_name)) clusters[cluster_name].append( [sample.md5, sample.name]) clustered = True break if not clustered: cluster_id = len(clusters) + 1 clusters[cluster_id] = [ [sample.md5, sample.name], ] ordered_clusters = collections.OrderedDict( sorted(clusters.items())) self.log( 'info', "Following are the identified clusters with more than one member" ) for cluster_name, cluster_members in ordered_clusters.items(): # We include in the results only clusters with more than just # one member. if len(cluster_members) <= 1: continue self.log('info', "Ssdeep cluster {0}".format(bold(cluster_name))) self.log( 'table', dict(header=['MD5', 'Name'], rows=cluster_members)) # We're running against the already opened file. else: if not __sessions__.is_set(): self.log('error', "No session opened") return if not __sessions__.current.file.ssdeep: self.log('error', "No ssdeep hash available for opened file") return matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue if not sample.ssdeep: continue score = pydeep.compare(__sessions__.current.file.ssdeep, sample.ssdeep) if score > 40: matches.append( ['{0}%'.format(score), sample.name, sample.sha256]) if arg_verbose: self.log( 'info', "Match {0}%: {2} [{1}]".format( score, sample.name, sample.sha256)) self.log( 'info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log( 'table', dict(header=['Score', 'Name', 'SHA256'], rows=matches))
def language(self): def get_iat(pe): iat = [] if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): for peimport in pe.DIRECTORY_ENTRY_IMPORT: iat.append(peimport.dll) return iat def check_module(iat, match): for imp in iat: if imp.find(match) != -1: return True return False def is_cpp(data, cpp_count): for line in data: if 'type_info' in line or 'RTTI' in line: cpp_count += 1 break if cpp_count == 2: return True return False def is_delphi(data): for line in data: if 'Borland' in line: path = line.split('\\') for p in path: if 'Delphi' in p: return True return False def is_vbdotnet(data): for line in data: if 'Compiler' in line: stuff = line.split('.') if 'VisualBasic' in stuff: return True return False def is_autoit(data): for line in data: if 'AU3!' in line: return True return False def is_packed(pe): for section in pe.sections: if section.get_entropy() > 7: return True return False def get_strings(content): regexp = '[\x30-\x39\x41-\x5f\x61-\x7a\-\.:]{4,}' return re.findall(regexp, content) def find_language(iat, sample, content): dotnet = False cpp_count = 0 found = None # VB check if check_module(iat, 'VB'): self.log('info', "{0} - Possible language: Visual Basic".format(sample.name)) return True # .NET check if check_module(iat, 'mscoree.dll') and not found: dotnet = True found = '.NET' # C DLL check if not found and (check_module(iat, 'msvcr') or check_module(iat, 'MSVCR') or check_module(iat, 'c++')): cpp_count += 1 if not found: data = get_strings(content) if is_cpp(data, cpp_count) and not found: found = 'CPP' if not found and cpp_count == 1: found = 'C' if not dotnet and is_delphi(data) and not found: found = 'Delphi' if dotnet and is_vbdotnet(data): found = 'Visual Basic .NET' if is_autoit(data) and not found: found = 'AutoIt' return found if not self.__check_session(): return if is_packed(self.pe): self.log('warning', "Probably packed, the language guess might be unreliable") language = find_language( get_iat(self.pe), __sessions__.current.file, __sessions__.current.file.data ) if language: self.log('info', "Probable language: {0}".format(bold(language))) else: self.log('error', "Programming language not identified") return if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_pe = pefile.PE(sample_path) except pefile.PEFormatError as e: continue cur_packed = '' if is_packed(cur_pe): cur_packed = 'Yes' cur_language = find_language( get_iat(cur_pe), sample, open(sample_path, 'rb').read() ) if not cur_language: continue if cur_language == language: matches.append([sample.name, sample.md5, cur_packed]) if matches: self.log('table', dict(header=['Name', 'MD5', 'Is Packed'], rows=matches)) else: self.log('info', "No matches found")
def imphash(self): if self.args.scan and self.args.cluster: self.log('error', "You selected two exclusive options, pick one") return if self.args.cluster: self.log('info', "Clustering all samples by imphash...") db = Database() samples = db.find(key='all') cluster = {} for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_imphash = pefile.PE(sample_path).get_imphash() except: continue if cur_imphash not in cluster: cluster[cur_imphash] = [] cluster[cur_imphash].append([sample.sha256, sample.name]) for cluster_name, cluster_members in cluster.items(): # Skipping clusters with only one entry. if len(cluster_members) == 1: continue self.log('info', "Imphash cluster {0}".format(bold(cluster_name))) self.log('table', dict(header=['MD5', 'Name'], rows=cluster_members)) return if self.__check_session(): try: imphash = self.pe.get_imphash() except AttributeError: self.log('error', "No imphash support, upgrade pefile to a version >= 1.2.10-139 (`pip install --upgrade pefile`)") return self.log('info', "Imphash: {0}".format(bold(imphash))) if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_imphash = pefile.PE(sample_path).get_imphash() except: continue if imphash == cur_imphash: matches.append([sample.name, sample.sha256]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'SHA256'], rows=matches))
def resources(self): # Use this function to retrieve resources for the given PE instance. # Returns all the identified resources with indicators and attributes. def get_resources(pe): resources = [] if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): count = 1 for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries: try: resource = {} if resource_type.name is not None: name = str(resource_type.name) else: name = str(pefile.RESOURCE_TYPE.get(resource_type.struct.Id)) if name is None: name = str(resource_type.struct.Id) if hasattr(resource_type, 'directory'): for resource_id in resource_type.directory.entries: if hasattr(resource_id, 'directory'): for resource_lang in resource_id.directory.entries: data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size) filetype = get_type(data) md5 = get_md5(data) language = pefile.LANG.get(resource_lang.data.lang, None) sublanguage = pefile.get_sublang_name_for_lang(resource_lang.data.lang, resource_lang.data.sublang) offset = ('%-8s' % hex(resource_lang.data.struct.OffsetToData)).strip() size = ('%-8s' % hex(resource_lang.data.struct.Size)).strip() resource = [count, name, offset, md5, size, filetype, language, sublanguage] # Dump resources if requested to and if the file currently being # processed is the opened session file. # This is to avoid that during a --scan all the resources being # scanned are dumped as well. if (self.args.open or self.args.dump) and pe == self.pe: if self.args.dump: folder = self.args.dump else: folder = tempfile.mkdtemp() resource_path = os.path.join(folder, '{0}_{1}_{2}'.format(__sessions__.current.file.md5, offset, name)) resource.append(resource_path) with open(resource_path, 'wb') as resource_handle: resource_handle.write(data) resources.append(resource) count += 1 except Exception as e: self.log('error', e) continue return resources if not self.__check_session(): return # Obtain resources for the currently opened file. resources = get_resources(self.pe) if not resources: self.log('warning', "No resources found") return headers = ['#', 'Name', 'Offset', 'MD5', 'Size', 'File Type', 'Language', 'Sublanguage'] if self.args.dump or self.args.open: headers.append('Dumped To') self.log('table', dict(header=headers, rows=resources)) # If instructed, open a session on the given resource. if self.args.open: for resource in resources: if resource[0] == self.args.open: __sessions__.new(resource[8]) return # If instructed to perform a scan across the repository, start looping # through all available files. elif self.args.scan: self.log('info', "Scanning the repository for matching samples...") # Retrieve list of samples stored locally and available in the # database. db = Database() samples = db.find(key='all') matches = [] for sample in samples: # Skip if it's the same file. if sample.sha256 == __sessions__.current.file.sha256: continue # Obtain path to the binary. sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue # Open PE instance. try: cur_pe = pefile.PE(sample_path) except: continue # Obtain the list of resources for the current iteration. cur_resources = get_resources(cur_pe) matched_resources = [] # Loop through entry's resources. for cur_resource in cur_resources: # Loop through opened file's resources. for resource in resources: # If there is a common resource, add it to the list. if cur_resource[3] == resource[3]: matched_resources.append(resource[3]) # If there are any common resources, add the entry to the list # of matched samples. if len(matched_resources) > 0: matches.append([sample.name, sample.md5, '\n'.join(r for r in matched_resources)]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'MD5', 'Resource MD5'], rows=matches))
def scan(self): def string_printable(line): line = str(line) new_line = '' for c in line: if c in printstring.printable: new_line += c else: new_line += '\\x' + c.encode('hex') return new_line # This means users can just drop or remove rule files without # having to worry about maintaining the index. # TODO: make paths absolute. # TODO: this regenerates the file at every run, perhaps we # could find a way to optimize this. def rule_index(): tmp_path = os.path.join(tempfile.gettempdir(), 'index.yara') with open(tmp_path, 'w') as rules_index: for rule_file in os.listdir(self.rule_path): # Skip if the extension is not right, could cause problems. if not rule_file.endswith('.yar') and not rule_file.endswith('.yara'): continue # Skip if it's the index itself. if rule_file == 'index.yara': continue # Add the rule to the index. line = 'include "{0}"\n'.format(os.path.join(self.rule_path, rule_file)) rules_index.write(line) return tmp_path arg_rule = self.args.rule arg_scan_all = self.args.all arg_tag = self.args.tag # If no custom ruleset is specified, we use the default one. if not arg_rule: arg_rule = rule_index() # Check if the selected ruleset actually exists. if not os.path.exists(arg_rule): self.log('error', "No valid Yara ruleset at {0}".format(arg_rule)) return # Compile all rules from given ruleset. rules = yara.compile(arg_rule) files = [] # If there is a session open and the user didn't specifically # request to scan the full repository, we just add the currently # opened file's path. if __sessions__.is_set() and not arg_scan_all: files.append(__sessions__.current.file) # Otherwise we loop through all files in the repository and queue # them up for scan. else: self.log('info', "Scanning all stored files...") db = Database() samples = db.find(key='all') for sample in samples: files.append(sample) for entry in files: if entry.size == 0: continue self.log('info', "Scanning {0} ({1})".format(entry.name, entry.sha256)) # Check if the entry has a path attribute. This happens when # there is a session open. We need to distinguish this just for # the cases where we're scanning an opened file which has not been # stored yet. if hasattr(entry, 'path'): entry_path = entry.path # This should be triggered only when scanning the full repository. else: entry_path = get_sample_path(entry.sha256) # Check if the file exists before running the yara scan. if not os.path.exists(entry_path): self.log('error', "The file does not exist at path {0}".format(entry_path)) return rows = [] tag_list = [] for match in rules.match(entry_path): # Add a row for each string matched by the rule. for string in match.strings: rows.append([match.rule, string_printable(string[1]), string_printable(string[0]), string_printable(string[2])]) # Add matching rules to our list of tags. # First it checks if there are tags specified in the metadata # of the Yara rule. match_tags = match.meta.get('tags') # If not, use the rule name. # TODO: as we add more and more yara rules, we might remove # this option and only tag the file with rules that had # tags specified in them. if not match_tags: match_tags = match.rule # Add the tags to the list. tag_list.append([entry.sha256, match_tags]) if rows: header = [ 'Rule', 'String', 'Offset', 'Content' ] self.log('table', dict(header=header, rows=rows)) # If we selected to add tags do that now. if rows and arg_tag: db = Database() for tag in tag_list: db.add_tags(tag[0], tag[1]) # If in a session reset the session to see tags. if __sessions__.is_set() and not arg_scan_all: self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path)
def scan(self): def string_printable(line): line = str(line) new_line = '' for c in line: if c in printstring.printable: new_line += c else: new_line += '\\x' + c.encode('hex') return new_line # This means users can just drop or remove rule files without # having to worry about maintaining the index. # TODO: make paths absolute. # TODO: this regenerates the file at every run, perhaps we # could find a way to optimize this. def rule_index(): tmp_path = os.path.join(tempfile.gettempdir(), 'index.yara') with open(tmp_path, 'w') as rules_index: for rule_file in os.listdir(self.rule_path): # Skip if the extension is not right, could cause problems. if not rule_file.endswith( '.yar') and not rule_file.endswith('.yara'): continue # Skip if it's the index itself. if rule_file == 'index.yara': continue # Add the rule to the index. line = 'include "{0}"\n'.format( os.path.join(self.rule_path, rule_file)) rules_index.write(line) return tmp_path arg_rule = self.args.rule arg_scan_all = self.args.all arg_tag = self.args.tag # If no custom ruleset is specified, we use the default one. if not arg_rule: arg_rule = rule_index() # Check if the selected ruleset actually exists. if not os.path.exists(arg_rule): self.log('error', "No valid Yara ruleset at {0}".format(arg_rule)) return # Compile all rules from given ruleset. rules = yara.compile(arg_rule) files = [] # If there is a session open and the user didn't specifically # request to scan the full repository, we just add the currently # opened file's path. if __sessions__.is_set() and not arg_scan_all: files.append(__sessions__.current.file) # Otherwise we loop through all files in the repository and queue # them up for scan. else: self.log('info', "Scanning all stored files...") db = Database() samples = db.find(key='all') for sample in samples: files.append(sample) for entry in files: if entry.size == 0: continue self.log('info', "Scanning {0} ({1})".format(entry.name, entry.sha256)) # Check if the entry has a path attribute. This happens when # there is a session open. We need to distinguish this just for # the cases where we're scanning an opened file which has not been # stored yet. if hasattr(entry, 'path'): entry_path = entry.path # This should be triggered only when scanning the full repository. else: entry_path = get_sample_path(entry.sha256) # Check if the file exists before running the yara scan. if not os.path.exists(entry_path): self.log( 'error', "The file does not exist at path {0}".format(entry_path)) return rows = [] tag_list = [] for match in rules.match(entry_path): # Add a row for each string matched by the rule. for string in match.strings: rows.append([ match.rule, string_printable(string[1]), string_printable(string[0]), string_printable(string[2]) ]) # Add matching rules to our list of tags. # First it checks if there are tags specified in the metadata # of the Yara rule. match_tags = match.meta.get('tags') # If not, use the rule name. # TODO: as we add more and more yara rules, we might remove # this option and only tag the file with rules that had # tags specified in them. if not match_tags: match_tags = match.rule # Add the tags to the list. tag_list.append([entry.sha256, match_tags]) if rows: header = ['Rule', 'String', 'Offset', 'Content'] self.log('table', dict(header=header, rows=rows)) # If we selected to add tags do that now. if rows and arg_tag: db = Database() for tag in tag_list: db.add_tags(tag[0], tag[1]) # If in a session reset the session to see tags. if __sessions__.is_set() and not arg_scan_all: self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path)