def peid(self): def get_signatures(): with file(os.path.join(CIRTKIT_ROOT, 'data/peid/UserDB.TXT'), 'rt') as f: sig_data = f.read() signatures = peutils.SignatureDatabase(data=sig_data) return signatures def get_matches(pe, signatures): matches = signatures.match_all(pe, ep_only=True) return matches if not self.__check_session(): return signatures = get_signatures() peid_matches = get_matches(self.pe, signatures) if peid_matches: self.log('info', "PEiD Signatures:") for sig in peid_matches: if type(sig) is list: self.log('item', sig[0]) else: self.log('item', sig) else: self.log('info', "No PEiD signatures matched.") if self.args.scan and peid_matches: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_pe = pefile.PE(sample_path) cur_peid_matches = get_matches(cur_pe, signatures) except: continue if peid_matches == cur_peid_matches: matches.append([sample.name, sample.sha256]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'SHA256'], rows=matches))
def add_file(obj, tags=None): if get_sample_path(obj.sha256): self.log('warning', "Skip, file \"{0}\" appears to be already stored".format(obj.name)) return False if __project__.name: pass else: print_error("Must open an investigation to store files") return False # Try to store file object into database. status = self.db.add(obj=obj, tags=tags) if status: # If succeeds, store also in the local repository. # If something fails in the database (for example unicode strings) # we don't want to have the binary lying in the repository with no # associated database record. new_path = store_sample(obj) self.log("success", "Stored file \"{0}\" to {1}".format(obj.name, new_path)) else: return False # Delete the file if requested to do so. if args.delete: try: os.unlink(obj.path) except Exception as e: self.log('warning', "Failed deleting file: {0}".format(e)) return True
def pehash(self): if not HAVE_PEHASH: self.log('error', "PEhash is missing. Please copy PEhash to the modules directory of Viper") return current_pehash = None if __sessions__.is_set(): current_pehash = calculate_pehash(__sessions__.current.file.path) self.log('info', "PEhash: {0}".format(bold(current_pehash))) if self.args.all or self.args.cluster or self.args.scan: db = Database() samples = db.find(key='all') rows = [] for sample in samples: sample_path = get_sample_path(sample.sha256) pe_hash = calculate_pehash(sample_path) if pe_hash: rows.append((sample.name, sample.md5, pe_hash)) if self.args.all: self.log('info', "PEhash for all files:") header = ['Name', 'MD5', 'PEhash'] self.log('table', dict(header=header, rows=rows)) elif self.args.cluster: self.log('info', "Clustering files by PEhash...") cluster = {} for sample_name, sample_md5, pe_hash in rows: cluster.setdefault(pe_hash, []).append([sample_name, sample_md5]) for item in cluster.items(): if len(item[1]) > 1: self.log('info', "PEhash cluster {0}:".format(bold(item[0]))) self.log('table', dict(header=['Name', 'MD5'], rows=item[1])) elif self.args.scan: if __sessions__.is_set() and current_pehash: self.log('info', "Finding matching samples...") matches = [] for row in rows: if row[1] == __sessions__.current.file.md5: continue if row[2] == current_pehash: matches.append([row[0], row[1]]) if matches: self.log('table', dict(header=['Name', 'MD5'], rows=matches)) else: self.log('info', "No matches found")
def compiletime(self): def get_compiletime(pe): return datetime.datetime.fromtimestamp( pe.FILE_HEADER.TimeDateStamp) if not self.__check_session(): return compile_time = get_compiletime(self.pe) self.log('info', "Compile Time: {0}".format(bold(compile_time))) if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_pe = pefile.PE(sample_path) cur_compile_time = get_compiletime(cur_pe) except: continue if compile_time == cur_compile_time: matches.append([sample.name, sample.md5, cur_compile_time]) else: if self.args.window: if cur_compile_time > compile_time: delta = (cur_compile_time - compile_time) elif cur_compile_time < compile_time: delta = (compile_time - cur_compile_time) delta_minutes = int(delta.total_seconds()) / 60 if delta_minutes <= self.args.window: matches.append( [sample.name, sample.md5, cur_compile_time]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log( 'table', dict(header=['Name', 'MD5', 'Compile Time'], rows=matches))
def compiletime(self): def get_compiletime(pe): return datetime.datetime.fromtimestamp(pe.FILE_HEADER.TimeDateStamp) if not self.__check_session(): return compile_time = get_compiletime(self.pe) self.log('info', "Compile Time: {0}".format(bold(compile_time))) if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_pe = pefile.PE(sample_path) cur_compile_time = get_compiletime(cur_pe) except: continue if compile_time == cur_compile_time: matches.append([sample.name, sample.md5, cur_compile_time]) else: if self.args.window: if cur_compile_time > compile_time: delta = (cur_compile_time - compile_time) elif cur_compile_time < compile_time: delta = (compile_time - cur_compile_time) delta_minutes = int(delta.total_seconds()) / 60 if delta_minutes <= self.args.window: matches.append([sample.name, sample.md5, cur_compile_time]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'MD5', 'Compile Time'], rows=matches))
def search_local_hashes(self, event): local = [] samples_count = 0 for a in event['Event']['Attribute']: row = None if a['type'] == 'malware-sample': samples_count += 1 if a['type'] in ('malware-sample', 'filename|md5', 'md5'): h = a['value'] if '|' in a['type']: h = a['value'].split('|')[1] row = Database().find(key='md5', value=h) elif a['type'] in ('sha1', 'filename|sha1'): h = a['value'] if '|' in a['type']: h = a['value'].split('|')[1] row = Database().find(key='sha1', value=h) elif a['type'] in ('sha256', 'filename|sha256'): h = a['value'] if '|' in a['type']: h = a['value'].split('|')[1] row = Database().find(key='sha256', value=h) if row: local.append(row[0]) self.log('info', 'This event contains {} samples.'.format(samples_count)) shas = set([l.sha256 for l in local]) if len(shas) == 1: __sessions__.new(get_sample_path(shas.pop()), MispEvent(event)) elif len(shas) > 1: self.log('success', 'The following samples are in this cirtkit instance:') __sessions__.new(misp_event=MispEvent(event)) for s in shas: self.log('item', s) else: __sessions__.new(misp_event=MispEvent(event)) self.log('info', 'No known (in Viper) samples in that event.')
def get_signed_samples(current=None, cert_filter=None): db = Database() samples = db.find(key='all') results = [] for sample in samples: # Skip if it's the same file. if current: if sample.sha256 == current: continue # Obtain path to the binary. sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue # Open PE instance. try: cur_pe = pefile.PE(sample_path) except: continue cur_cert_data = get_certificate(cur_pe) if not cur_cert_data: continue cur_cert_md5 = get_md5(cur_cert_data) if cert_filter: if cur_cert_md5 == cert_filter: results.append([sample.name, sample.md5]) else: results.append([sample.name, sample.md5, cur_cert_md5]) return results
def scan(self): def string_printable(line): line = str(line) new_line = '' for c in line: if c in printstring.printable: new_line += c else: new_line += '\\x' + c.encode('hex') return new_line # This means users can just drop or remove rule files without # having to worry about maintaining the index. # TODO: make paths absolute. # TODO: this regenerates the file at every run, perhaps we # could find a way to optimize this. def rule_index(): tmp_path = os.path.join(tempfile.gettempdir(), 'index.yara') with open(tmp_path, 'w') as rules_index: for rule_file in os.listdir(self.rule_path): # Skip if the extension is not right, could cause problems. if not rule_file.endswith( '.yar') and not rule_file.endswith('.yara'): continue # Skip if it's the index itself. if rule_file == 'index.yara': continue # Add the rule to the index. line = 'include "{0}"\n'.format( os.path.join(self.rule_path, rule_file)) rules_index.write(line) return tmp_path arg_rule = self.args.rule arg_scan_all = self.args.all arg_tag = self.args.tag # If no custom ruleset is specified, we use the default one. if not arg_rule: arg_rule = rule_index() # Check if the selected ruleset actually exists. if not os.path.exists(arg_rule): self.log('error', "No valid Yara ruleset at {0}".format(arg_rule)) return # Compile all rules from given ruleset. rules = yara.compile(arg_rule) files = [] # If there is a session open and the user didn't specifically # request to scan the full repository, we just add the currently # opened file's path. if __sessions__.is_set() and not arg_scan_all: files.append(__sessions__.current.file) # Otherwise we loop through all files in the repository and queue # them up for scan. else: self.log('info', "Scanning all stored files...") db = Database() samples = db.find(key='all') for sample in samples: files.append(sample) for entry in files: if entry.size == 0: continue self.log('info', "Scanning {0} ({1})".format(entry.name, entry.sha256)) # Check if the entry has a path attribute. This happens when # there is a session open. We need to distinguish this just for # the cases where we're scanning an opened file which has not been # stored yet. if hasattr(entry, 'path'): entry_path = entry.path # This should be triggered only when scanning the full repository. else: entry_path = get_sample_path(entry.sha256) # Check if the file exists before running the yara scan. if not os.path.exists(entry_path): self.log( 'error', "The file does not exist at path {0}".format(entry_path)) return rows = [] tag_list = [] for match in rules.match(entry_path): # Add a row for each string matched by the rule. for string in match.strings: rows.append([ match.rule, string_printable(string[1]), string_printable(string[0]), string_printable(string[2]) ]) # Add matching rules to our list of tags. # First it checks if there are tags specified in the metadata # of the Yara rule. match_tags = match.meta.get('tags') # If not, use the rule name. # TODO: as we add more and more yara rules, we might remove # this option and only tag the file with rules that had # tags specified in them. if not match_tags: match_tags = match.rule # Add the tags to the list. tag_list.append([entry.sha256, match_tags]) if rows: header = ['Rule', 'String', 'Offset', 'Content'] self.log('table', dict(header=header, rows=rows)) # If we selected to add tags do that now. if rows and arg_tag: db = Database() for tag in tag_list: db.add_tags(tag[0], tag[1]) # If in a session reset the session to see tags. if __sessions__.is_set() and not arg_scan_all: self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path)
def language(self): def get_iat(pe): iat = [] if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): for peimport in pe.DIRECTORY_ENTRY_IMPORT: iat.append(peimport.dll) return iat def check_module(iat, match): for imp in iat: if imp.find(match) != -1: return True return False def is_cpp(data, cpp_count): for line in data: if 'type_info' in line or 'RTTI' in line: cpp_count += 1 break if cpp_count == 2: return True return False def is_delphi(data): for line in data: if 'Borland' in line: path = line.split('\\') for p in path: if 'Delphi' in p: return True return False def is_vbdotnet(data): for line in data: if 'Compiler' in line: stuff = line.split('.') if 'VisualBasic' in stuff: return True return False def is_autoit(data): for line in data: if 'AU3!' in line: return True return False def is_packed(pe): for section in pe.sections: if section.get_entropy() > 7: return True return False def get_strings(content): regexp = '[\x30-\x39\x41-\x5f\x61-\x7a\-\.:]{4,}' return re.findall(regexp, content) def find_language(iat, sample, content): dotnet = False cpp_count = 0 found = None # VB check if check_module(iat, 'VB'): self.log( 'info', "{0} - Possible language: Visual Basic".format( sample.name)) return True # .NET check if check_module(iat, 'mscoree.dll') and not found: dotnet = True found = '.NET' # C DLL check if not found and (check_module(iat, 'msvcr') or check_module( iat, 'MSVCR') or check_module(iat, 'c++')): cpp_count += 1 if not found: data = get_strings(content) if is_cpp(data, cpp_count) and not found: found = 'CPP' if not found and cpp_count == 1: found = 'C' if not dotnet and is_delphi(data) and not found: found = 'Delphi' if dotnet and is_vbdotnet(data): found = 'Visual Basic .NET' if is_autoit(data) and not found: found = 'AutoIt' return found if not self.__check_session(): return if is_packed(self.pe): self.log( 'warning', "Probably packed, the language guess might be unreliable") language = find_language(get_iat(self.pe), __sessions__.current.file, __sessions__.current.file.data) if language: self.log('info', "Probable language: {0}".format(bold(language))) else: self.log('error', "Programming language not identified") return if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_pe = pefile.PE(sample_path) except pefile.PEFormatError as e: continue cur_packed = '' if is_packed(cur_pe): cur_packed = 'Yes' cur_language = find_language(get_iat(cur_pe), sample, open(sample_path, 'rb').read()) if not cur_language: continue if cur_language == language: matches.append([sample.name, sample.md5, cur_packed]) if matches: self.log( 'table', dict(header=['Name', 'MD5', 'Is Packed'], rows=matches)) else: self.log('info', "No matches found")
def imphash(self): if self.args.scan and self.args.cluster: self.log('error', "You selected two exclusive options, pick one") return if self.args.cluster: self.log('info', "Clustering all samples by imphash...") db = Database() samples = db.find(key='all') cluster = {} for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_imphash = pefile.PE(sample_path).get_imphash() except: continue if cur_imphash not in cluster: cluster[cur_imphash] = [] cluster[cur_imphash].append([sample.sha256, sample.name]) for cluster_name, cluster_members in cluster.items(): # Skipping clusters with only one entry. if len(cluster_members) == 1: continue self.log('info', "Imphash cluster {0}".format(bold(cluster_name))) self.log('table', dict(header=['MD5', 'Name'], rows=cluster_members)) return if self.__check_session(): try: imphash = self.pe.get_imphash() except AttributeError: self.log( 'error', "No imphash support, upgrade pefile to a version >= 1.2.10-139 (`pip install --upgrade pefile`)" ) return self.log('info', "Imphash: {0}".format(bold(imphash))) if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_imphash = pefile.PE(sample_path).get_imphash() except: continue if imphash == cur_imphash: matches.append([sample.name, sample.sha256]) self.log( 'info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'SHA256'], rows=matches))
def resources(self): # Use this function to retrieve resources for the given PE instance. # Returns all the identified resources with indicators and attributes. def get_resources(pe): resources = [] if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): count = 1 for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries: try: resource = {} if resource_type.name is not None: name = str(resource_type.name) else: name = str( pefile.RESOURCE_TYPE.get( resource_type.struct.Id)) if name is None: name = str(resource_type.struct.Id) if hasattr(resource_type, 'directory'): for resource_id in resource_type.directory.entries: if hasattr(resource_id, 'directory'): for resource_lang in resource_id.directory.entries: data = pe.get_data( resource_lang.data.struct. OffsetToData, resource_lang.data.struct.Size) filetype = get_type(data) md5 = get_md5(data) language = pefile.LANG.get( resource_lang.data.lang, None) sublanguage = pefile.get_sublang_name_for_lang( resource_lang.data.lang, resource_lang.data.sublang) offset = ('%-8s' % hex(resource_lang.data.struct .OffsetToData)).strip() size = ( '%-8s' % hex(resource_lang.data.struct.Size) ).strip() resource = [ count, name, offset, md5, size, filetype, language, sublanguage ] # Dump resources if requested to and if the file currently being # processed is the opened session file. # This is to avoid that during a --scan all the resources being # scanned are dumped as well. if (self.args.open or self.args.dump ) and pe == self.pe: if self.args.dump: folder = self.args.dump else: folder = tempfile.mkdtemp() resource_path = os.path.join( folder, '{0}_{1}_{2}'.format( __sessions__.current.file. md5, offset, name)) resource.append(resource_path) with open(resource_path, 'wb') as resource_handle: resource_handle.write(data) resources.append(resource) count += 1 except Exception as e: self.log('error', e) continue return resources if not self.__check_session(): return # Obtain resources for the currently opened file. resources = get_resources(self.pe) if not resources: self.log('warning', "No resources found") return headers = [ '#', 'Name', 'Offset', 'MD5', 'Size', 'File Type', 'Language', 'Sublanguage' ] if self.args.dump or self.args.open: headers.append('Dumped To') self.log('table', dict(header=headers, rows=resources)) # If instructed, open a session on the given resource. if self.args.open: for resource in resources: if resource[0] == self.args.open: __sessions__.new(resource[8]) return # If instructed to perform a scan across the repository, start looping # through all available files. elif self.args.scan: self.log('info', "Scanning the repository for matching samples...") # Retrieve list of samples stored locally and available in the # database. db = Database() samples = db.find(key='all') matches = [] for sample in samples: # Skip if it's the same file. if sample.sha256 == __sessions__.current.file.sha256: continue # Obtain path to the binary. sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue # Open PE instance. try: cur_pe = pefile.PE(sample_path) except: continue # Obtain the list of resources for the current iteration. cur_resources = get_resources(cur_pe) matched_resources = [] # Loop through entry's resources. for cur_resource in cur_resources: # Loop through opened file's resources. for resource in resources: # If there is a common resource, add it to the list. if cur_resource[3] == resource[3]: matched_resources.append(resource[3]) # If there are any common resources, add the entry to the list # of matched samples. if len(matched_resources) > 0: matches.append([ sample.name, sample.md5, '\n'.join(r for r in matched_resources) ]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log( 'table', dict(header=['Name', 'MD5', 'Resource MD5'], rows=matches))
def entrypoint(self): if self.args.scan and self.args.cluster: self.log('error', "You selected two exclusive options, pick one") return if self.args.all: db = Database() samples = db.find(key='all') rows = [] for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE(sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue rows.append([sample.md5, sample.name, cur_ep]) self.log('table', dict(header=['MD5', 'Name', 'AddressOfEntryPoint'], rows=rows)) return if self.args.cluster: db = Database() samples = db.find(key='all') cluster = {} for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE(sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue if cur_ep not in cluster: cluster[cur_ep] = [] cluster[cur_ep].append([sample.md5, sample.name]) for cluster_name, cluster_members in cluster.items(): # Skipping clusters with only one entry. if len(cluster_members) == 1: continue self.log('info', "AddressOfEntryPoint cluster {0}".format(bold(cluster_name))) self.log('table', dict(header=['MD5', 'Name'], rows=cluster_members)) return if not self.__check_session(): return ep = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint self.log('info', "AddressOfEntryPoint: {0}".format(ep)) if self.args.scan: db = Database() samples = db.find(key='all') rows = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE(sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue if ep == cur_ep: rows.append([sample.md5, sample.name]) self.log('info', "Following are samples with AddressOfEntryPoint {0}".format(bold(ep))) self.log('table', dict(header=['MD5', 'Name'], rows=rows))
def resources(self): # Use this function to retrieve resources for the given PE instance. # Returns all the identified resources with indicators and attributes. def get_resources(pe): resources = [] if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): count = 1 for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries: try: resource = {} if resource_type.name is not None: name = str(resource_type.name) else: name = str(pefile.RESOURCE_TYPE.get(resource_type.struct.Id)) if name is None: name = str(resource_type.struct.Id) if hasattr(resource_type, 'directory'): for resource_id in resource_type.directory.entries: if hasattr(resource_id, 'directory'): for resource_lang in resource_id.directory.entries: data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size) filetype = get_type(data) md5 = get_md5(data) language = pefile.LANG.get(resource_lang.data.lang, None) sublanguage = pefile.get_sublang_name_for_lang(resource_lang.data.lang, resource_lang.data.sublang) offset = ('%-8s' % hex(resource_lang.data.struct.OffsetToData)).strip() size = ('%-8s' % hex(resource_lang.data.struct.Size)).strip() resource = [count, name, offset, md5, size, filetype, language, sublanguage] # Dump resources if requested to and if the file currently being # processed is the opened session file. # This is to avoid that during a --scan all the resources being # scanned are dumped as well. if (self.args.open or self.args.dump) and pe == self.pe: if self.args.dump: folder = self.args.dump else: folder = tempfile.mkdtemp() resource_path = os.path.join(folder, '{0}_{1}_{2}'.format(__sessions__.current.file.md5, offset, name)) resource.append(resource_path) with open(resource_path, 'wb') as resource_handle: resource_handle.write(data) resources.append(resource) count += 1 except Exception as e: self.log('error', e) continue return resources if not self.__check_session(): return # Obtain resources for the currently opened file. resources = get_resources(self.pe) if not resources: self.log('warning', "No resources found") return headers = ['#', 'Name', 'Offset', 'MD5', 'Size', 'File Type', 'Language', 'Sublanguage'] if self.args.dump or self.args.open: headers.append('Dumped To') self.log('table', dict(header=headers, rows=resources)) # If instructed, open a session on the given resource. if self.args.open: for resource in resources: if resource[0] == self.args.open: __sessions__.new(resource[8]) return # If instructed to perform a scan across the repository, start looping # through all available files. elif self.args.scan: self.log('info', "Scanning the repository for matching samples...") # Retrieve list of samples stored locally and available in the # database. db = Database() samples = db.find(key='all') matches = [] for sample in samples: # Skip if it's the same file. if sample.sha256 == __sessions__.current.file.sha256: continue # Obtain path to the binary. sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue # Open PE instance. try: cur_pe = pefile.PE(sample_path) except: continue # Obtain the list of resources for the current iteration. cur_resources = get_resources(cur_pe) matched_resources = [] # Loop through entry's resources. for cur_resource in cur_resources: # Loop through opened file's resources. for resource in resources: # If there is a common resource, add it to the list. if cur_resource[3] == resource[3]: matched_resources.append(resource[3]) # If there are any common resources, add the entry to the list # of matched samples. if len(matched_resources) > 0: matches.append([sample.name, sample.md5, '\n'.join(r for r in matched_resources)]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'MD5', 'Resource MD5'], rows=matches))
def entrypoint(self): if self.args.scan and self.args.cluster: self.log('error', "You selected two exclusive options, pick one") return if self.args.all: db = Database() samples = db.find(key='all') rows = [] for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE( sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue rows.append([sample.md5, sample.name, cur_ep]) self.log( 'table', dict(header=['MD5', 'Name', 'AddressOfEntryPoint'], rows=rows)) return if self.args.cluster: db = Database() samples = db.find(key='all') cluster = {} for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE( sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue if cur_ep not in cluster: cluster[cur_ep] = [] cluster[cur_ep].append([sample.md5, sample.name]) for cluster_name, cluster_members in cluster.items(): # Skipping clusters with only one entry. if len(cluster_members) == 1: continue self.log( 'info', "AddressOfEntryPoint cluster {0}".format( bold(cluster_name))) self.log('table', dict(header=['MD5', 'Name'], rows=cluster_members)) return if not self.__check_session(): return ep = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint self.log('info', "AddressOfEntryPoint: {0}".format(ep)) if self.args.scan: db = Database() samples = db.find(key='all') rows = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_ep = pefile.PE( sample_path).OPTIONAL_HEADER.AddressOfEntryPoint except: continue if ep == cur_ep: rows.append([sample.md5, sample.name]) self.log( 'info', "Following are samples with AddressOfEntryPoint {0}".format( bold(ep))) self.log('table', dict(header=['MD5', 'Name'], rows=rows))
def pehash(self): if not HAVE_PEHASH: self.log( 'error', "PEhash is missing. Please copy PEhash to the modules directory of Viper" ) return current_pehash = None if __sessions__.is_set(): current_pehash = calculate_pehash(__sessions__.current.file.path) self.log('info', "PEhash: {0}".format(bold(current_pehash))) if self.args.all or self.args.cluster or self.args.scan: db = Database() samples = db.find(key='all') rows = [] for sample in samples: sample_path = get_sample_path(sample.sha256) pe_hash = calculate_pehash(sample_path) if pe_hash: rows.append((sample.name, sample.md5, pe_hash)) if self.args.all: self.log('info', "PEhash for all files:") header = ['Name', 'MD5', 'PEhash'] self.log('table', dict(header=header, rows=rows)) elif self.args.cluster: self.log('info', "Clustering files by PEhash...") cluster = {} for sample_name, sample_md5, pe_hash in rows: cluster.setdefault(pe_hash, []).append([sample_name, sample_md5]) for item in cluster.items(): if len(item[1]) > 1: self.log('info', "PEhash cluster {0}:".format(bold(item[0]))) self.log('table', dict(header=['Name', 'MD5'], rows=item[1])) elif self.args.scan: if __sessions__.is_set() and current_pehash: self.log('info', "Finding matching samples...") matches = [] for row in rows: if row[1] == __sessions__.current.file.md5: continue if row[2] == current_pehash: matches.append([row[0], row[1]]) if matches: self.log('table', dict(header=['Name', 'MD5'], rows=matches)) else: self.log('info', "No matches found")
def do_get(self, line): ''' Command: get Description: Get (copy) a file, or parts of file, from the sensor. Args: get [OPTIONS] <RemotePath> <LocalPath> where OPTIONS are: -o, --offset : The offset to start getting the file at -b, --bytes : How many bytes of the file to get. The default is all bytes. ''' self._needs_attached() import tempfile if __project__.name: pass else: print_error("Must open an investigation to retrieve files") return # close session of current file if opened if __sessions__: __sessions__.close() # establish connection to db db = Database() p = CliArgs(usage='get [OPTIONS] <RemoteFile> <LocalName>') p.add_option('-o', '--offset', default="0", help='Offset of the file to start grabbing') p.add_option('-b', '--bytes', default=None, help='How many bytes to grab') (opts, args) = p.parse_line(line) if len(args) != 2: raise CliArgsException("Wrong number of args to get command") # Create a new temporary file. fout = tempfile.NamedTemporaryFile(delete=False) # Fix file path gfile = self._file_path_fixup(args[0]) hargs = {} offset = 0 if opts.offset != 0: hargs['offset'] = int(opts.offset) if opts.bytes: hargs['get_count'] = int(opts.bytes) try: ret = self._postCommandAndWait("get file", gfile, args=hargs) fid = ret["file_id"] url = '%s/api/v1/cblr/session/%d/file/%d/content' % (self.url, self.session, fid) fdata = self._doGet(url, retJSON=False) fout.write(fdata) fout.close() __sessions__.new(fout.name) store_sample(__sessions__.current.file) __sessions__.current.file.path = get_sample_path(__sessions__.current.file.sha256) db.add(obj=__sessions__.current.file) os.remove(fout.name) except: # delete the output file on error fout.close() os.remove(fout.name) raise
def scan(self): def string_printable(line): line = str(line) new_line = '' for c in line: if c in printstring.printable: new_line += c else: new_line += '\\x' + c.encode('hex') return new_line # This means users can just drop or remove rule files without # having to worry about maintaining the index. # TODO: make paths absolute. # TODO: this regenerates the file at every run, perhaps we # could find a way to optimize this. def rule_index(): tmp_path = os.path.join(tempfile.gettempdir(), 'index.yara') with open(tmp_path, 'w') as rules_index: for rule_file in os.listdir(self.rule_path): # Skip if the extension is not right, could cause problems. if not rule_file.endswith('.yar') and not rule_file.endswith('.yara'): continue # Skip if it's the index itself. if rule_file == 'index.yara': continue # Add the rule to the index. line = 'include "{0}"\n'.format(os.path.join(self.rule_path, rule_file)) rules_index.write(line) return tmp_path arg_rule = self.args.rule arg_scan_all = self.args.all arg_tag = self.args.tag # If no custom ruleset is specified, we use the default one. if not arg_rule: arg_rule = rule_index() # Check if the selected ruleset actually exists. if not os.path.exists(arg_rule): self.log('error', "No valid Yara ruleset at {0}".format(arg_rule)) return # Compile all rules from given ruleset. rules = yara.compile(arg_rule) files = [] # If there is a session open and the user didn't specifically # request to scan the full repository, we just add the currently # opened file's path. if __sessions__.is_set() and not arg_scan_all: files.append(__sessions__.current.file) # Otherwise we loop through all files in the repository and queue # them up for scan. else: self.log('info', "Scanning all stored files...") db = Database() samples = db.find(key='all') for sample in samples: files.append(sample) for entry in files: if entry.size == 0: continue self.log('info', "Scanning {0} ({1})".format(entry.name, entry.sha256)) # Check if the entry has a path attribute. This happens when # there is a session open. We need to distinguish this just for # the cases where we're scanning an opened file which has not been # stored yet. if hasattr(entry, 'path'): entry_path = entry.path # This should be triggered only when scanning the full repository. else: entry_path = get_sample_path(entry.sha256) # Check if the file exists before running the yara scan. if not os.path.exists(entry_path): self.log('error', "The file does not exist at path {0}".format(entry_path)) return rows = [] tag_list = [] for match in rules.match(entry_path): # Add a row for each string matched by the rule. for string in match.strings: rows.append([match.rule, string_printable(string[1]), string_printable(string[0]), string_printable(string[2])]) # Add matching rules to our list of tags. # First it checks if there are tags specified in the metadata # of the Yara rule. match_tags = match.meta.get('tags') # If not, use the rule name. # TODO: as we add more and more yara rules, we might remove # this option and only tag the file with rules that had # tags specified in them. if not match_tags: match_tags = match.rule # Add the tags to the list. tag_list.append([entry.sha256, match_tags]) if rows: header = [ 'Rule', 'String', 'Offset', 'Content' ] self.log('table', dict(header=header, rows=rows)) # If we selected to add tags do that now. if rows and arg_tag: db = Database() for tag in tag_list: db.add_tags(tag[0], tag[1]) # If in a session reset the session to see tags. if __sessions__.is_set() and not arg_scan_all: self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path)
def cmd_open(self, *args): parser = argparse.ArgumentParser(prog='open', description="Open a file", epilog="You can also specify a MD5 or SHA256 hash to a previously stored file in order to open a session on it.") group = parser.add_mutually_exclusive_group() group.add_argument('-f', '--file', action='store_true', help="Target is a file") group.add_argument('-u', '--url', action='store_true', help="Target is a URL") group.add_argument('-l', '--last', action='store_true', help="Target is the entry number from the last find command's results") parser.add_argument('-t', '--tor', action='store_true', help="Download the file through Tor") parser.add_argument("value", metavar='PATH, URL, HASH or ID', nargs='*', help="Target to open. Hash can be md5 or sha256. ID has to be from the last search.") try: args = parser.parse_args(args) except: return target = " ".join(args.value) if not args.last and target is None: parser.print_usage() return # If it's a file path, open a session on it. if args.file: target = os.path.expanduser(target) if not os.path.exists(target) or not os.path.isfile(target): self.log('error', "File not found: {0}".format(target)) return __sessions__.new(target) # If it's a URL, download it and open a session on the temporary file. elif args.url: data = download(url=target, tor=args.tor) if data: tmp = tempfile.NamedTemporaryFile(delete=False) tmp.write(data) tmp.close() __sessions__.new(tmp.name) # Try to open the specified file from the list of results from # the last find command. elif args.last: if __sessions__.find: count = 1 for item in __sessions__.find: if count == int(target): __sessions__.new(get_sample_path(item.sha256)) break count += 1 else: self.log('warning', "You haven't performed a find yet") # Otherwise we assume it's an hash of an previously stored sample. else: target = target.strip().lower() if len(target) == 32: key = 'md5' elif len(target) == 64: key = 'sha256' else: parser.print_usage() return rows = self.db.find(key=key, value=target) if not rows: self.log('warning', "No file found with the given hash {0}".format(target)) return path = get_sample_path(rows[0].sha256) if path: __sessions__.new(path)
def language(self): def get_iat(pe): iat = [] if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): for peimport in pe.DIRECTORY_ENTRY_IMPORT: iat.append(peimport.dll) return iat def check_module(iat, match): for imp in iat: if imp.find(match) != -1: return True return False def is_cpp(data, cpp_count): for line in data: if 'type_info' in line or 'RTTI' in line: cpp_count += 1 break if cpp_count == 2: return True return False def is_delphi(data): for line in data: if 'Borland' in line: path = line.split('\\') for p in path: if 'Delphi' in p: return True return False def is_vbdotnet(data): for line in data: if 'Compiler' in line: stuff = line.split('.') if 'VisualBasic' in stuff: return True return False def is_autoit(data): for line in data: if 'AU3!' in line: return True return False def is_packed(pe): for section in pe.sections: if section.get_entropy() > 7: return True return False def get_strings(content): regexp = '[\x30-\x39\x41-\x5f\x61-\x7a\-\.:]{4,}' return re.findall(regexp, content) def find_language(iat, sample, content): dotnet = False cpp_count = 0 found = None # VB check if check_module(iat, 'VB'): self.log('info', "{0} - Possible language: Visual Basic".format(sample.name)) return True # .NET check if check_module(iat, 'mscoree.dll') and not found: dotnet = True found = '.NET' # C DLL check if not found and (check_module(iat, 'msvcr') or check_module(iat, 'MSVCR') or check_module(iat, 'c++')): cpp_count += 1 if not found: data = get_strings(content) if is_cpp(data, cpp_count) and not found: found = 'CPP' if not found and cpp_count == 1: found = 'C' if not dotnet and is_delphi(data) and not found: found = 'Delphi' if dotnet and is_vbdotnet(data): found = 'Visual Basic .NET' if is_autoit(data) and not found: found = 'AutoIt' return found if not self.__check_session(): return if is_packed(self.pe): self.log('warning', "Probably packed, the language guess might be unreliable") language = find_language( get_iat(self.pe), __sessions__.current.file, __sessions__.current.file.data ) if language: self.log('info', "Probable language: {0}".format(bold(language))) else: self.log('error', "Programming language not identified") return if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_pe = pefile.PE(sample_path) except pefile.PEFormatError as e: continue cur_packed = '' if is_packed(cur_pe): cur_packed = 'Yes' cur_language = find_language( get_iat(cur_pe), sample, open(sample_path, 'rb').read() ) if not cur_language: continue if cur_language == language: matches.append([sample.name, sample.md5, cur_packed]) if matches: self.log('table', dict(header=['Name', 'MD5', 'Is Packed'], rows=matches)) else: self.log('info', "No matches found")
def imphash(self): if self.args.scan and self.args.cluster: self.log('error', "You selected two exclusive options, pick one") return if self.args.cluster: self.log('info', "Clustering all samples by imphash...") db = Database() samples = db.find(key='all') cluster = {} for sample in samples: sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_imphash = pefile.PE(sample_path).get_imphash() except: continue if cur_imphash not in cluster: cluster[cur_imphash] = [] cluster[cur_imphash].append([sample.sha256, sample.name]) for cluster_name, cluster_members in cluster.items(): # Skipping clusters with only one entry. if len(cluster_members) == 1: continue self.log('info', "Imphash cluster {0}".format(bold(cluster_name))) self.log('table', dict(header=['MD5', 'Name'], rows=cluster_members)) return if self.__check_session(): try: imphash = self.pe.get_imphash() except AttributeError: self.log('error', "No imphash support, upgrade pefile to a version >= 1.2.10-139 (`pip install --upgrade pefile`)") return self.log('info', "Imphash: {0}".format(bold(imphash))) if self.args.scan: self.log('info', "Scanning the repository for matching samples...") db = Database() samples = db.find(key='all') matches = [] for sample in samples: if sample.sha256 == __sessions__.current.file.sha256: continue sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue try: cur_imphash = pefile.PE(sample_path).get_imphash() except: continue if imphash == cur_imphash: matches.append([sample.name, sample.sha256]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'SHA256'], rows=matches))