def cmd_new(self, *args): title = input("Enter a title for the new file: ") # Create a new temporary file. tmp = tempfile.NamedTemporaryFile(delete=False) # Open the temporary file with the default editor, or with nano. os.system('"${EDITOR:-nano}" ' + tmp.name) __sessions__.new(tmp.name) __sessions__.current.file.name = title print_info("New file with title \"{0}\" added to the current session".format(bold(title)))
def _check_add(self, new_event): if not new_event.get('Event'): self.log('error', new_event) return old_related = self._find_related_id(__sessions__.current.misp_event.event.get('Event')) new_related = self._find_related_id(new_event.get('Event')) for related in new_related: if related not in old_related: self.log('success', 'New related event: {}/{}'.format(self.url.rstrip('/'), related)) else: self.log('info', 'Related event: {}/{}'.format(self.url.rstrip('/'), related)) __sessions__.new(misp_event=MispEvent(new_event))
def cmd_tags(self, *args): parser = argparse.ArgumentParser(prog='tags', description="Modify tags of the opened file") parser.add_argument('-a', '--add', metavar='TAG', help="Add tags to the opened file (comma separated)") parser.add_argument('-d', '--delete', metavar='TAG', help="Delete a tag from the opened file") try: args = parser.parse_args(args) except: return # This command requires a session to be opened. if not __sessions__.is_set(): self.log('error', "No session opened") parser.print_usage() return # If no arguments are specified, there's not much to do. # However, it could make sense to also retrieve a list of existing # tags from this command, and not just from the "find" command alone. if args.add is None and args.delete is None: parser.print_usage() return # TODO: handle situation where addition or deletion of a tag fail. db = Database() if not db.find(key='sha256', value=__sessions__.current.file.sha256): self.log('error', "The opened file is not stored in the database. " "If you want to add it use the `store` command.") return if args.add: # Add specified tags to the database's entry belonging to # the opened file. db.add_tags(__sessions__.current.file.sha256, args.add) self.log('info', "Tags added to the currently opened file") # We refresh the opened session to update the attributes. # Namely, the list of tags returned by the 'info' command # needs to be re-generated, or it wouldn't show the new tags # until the existing session is closed a new one is opened. self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path) if args.delete: # Delete the tag from the database. db.delete_tag(args.delete, __sessions__.current.file.sha256) # Refresh the session so that the attributes of the file are # updated. self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path)
def upload(self): if not __sessions__.is_set(): self.log('error', "No session opened") return False categ = self.categories.get(self.args.categ) if self.args.info is not None: info = ' '.join(self.args.info) else: info = None if __sessions__.current.misp_event and self.args.event is None: event = __sessions__.current.misp_event.event_id else: event = None try: out = self.misp.upload_sample(__sessions__.current.file.name, __sessions__.current.file.path, event, self.args.distrib, self.args.ids, categ, info, self.args.analysis, self.args.threat) except Exception as e: self.log('error', e) return result = out.json() if out.status_code == 200: if result.get('errors') is not None: self.log('error', result.get('errors')[0]['error']['value'][0]) else: if event is not None: full_event = self.misp.get_event(event) return __sessions__.new(misp_event=MispEvent(full_event.json())) # TODO: also open a session when upload_sample created a new event # (the response doesn't contain the event ID) # __sessions__.new(misp_event=MispEvent(result)) self.log('success', "File uploaded sucessfully") else: self.log('error', result.get('message'))
def _check_add(self, new_event): if not new_event.get('Event'): self.log('error', new_event) return old_related = self._find_related_id( __sessions__.current.misp_event.event.get('Event')) new_related = self._find_related_id(new_event.get('Event')) for related in new_related: if related not in old_related: self.log( 'success', 'New related event: {}/{}'.format(self.url.rstrip('/'), related)) else: self.log( 'info', 'Related event: {}/{}'.format(self.url.rstrip('/'), related)) __sessions__.new(misp_event=MispEvent(new_event))
def download(self): ok = False data = None if self.args.event: ok, data = self.misp.download_samples(event_id=self.args.event) elif self.args.hash: ok, data = self.misp.download_samples(sample_hash=self.args.hash) else: # Download from current MISP event if possible if not __sessions__.is_set(): self.log('error', "No session opened") return False if not __sessions__.current.misp_event: self.log('error', "Not connected to a MISP event.") return False ok, data = self.misp.download_samples( event_id=__sessions__.current.misp_event.event_id) if not ok: self.log('error', data) return to_print = [] for d in data: eid, filename, payload = d path = os.path.join(tempfile.gettempdir(), filename) with open(path, 'w') as f: f.write(payload.getvalue()) to_print.append((eid, path)) if len(to_print) == 1: self.log( 'success', 'The sample has been downloaded from Event {}'.format( to_print[0][0])) event = self.misp.get_event(to_print[0][0]) return __sessions__.new(to_print[0][1], MispEvent(event.json())) else: self.log('success', 'The following files have been downloaded:') for p in to_print: self.log('success', '\tEventID: {} - {}'.format(*p))
def upload(self): if not __sessions__.is_set(): self.log('error', "No session opened") return False categ = self.categories.get(self.args.categ) if self.args.info is not None: info = ' '.join(self.args.info) else: info = None if __sessions__.current.misp_event and self.args.event is None: event = __sessions__.current.misp_event.event_id else: event = None try: out = self.misp.upload_sample(__sessions__.current.file.name, __sessions__.current.file.path, event, self.args.distrib, self.args.ids, categ, info, self.args.analysis, self.args.threat) except Exception as e: self.log('error', e) return result = out.json() if out.status_code == 200: if result.get('errors') is not None: self.log('error', result.get('errors')[0]['error']['value'][0]) else: if event is not None: full_event = self.misp.get_event(event) return __sessions__.new( misp_event=MispEvent(full_event.json())) # TODO: also open a session when upload_sample created a new event # (the response doesn't contain the event ID) # __sessions__.new(misp_event=MispEvent(result)) self.log('success', "File uploaded sucessfully") else: self.log('error', result.get('message'))
def download(self): ok = False data = None if self.args.event: ok, data = self.misp.download_samples(event_id=self.args.event) elif self.args.hash: ok, data = self.misp.download_samples(sample_hash=self.args.hash) else: # Download from current MISP event if possible if not __sessions__.is_set(): self.log('error', "No session opened") return False if not __sessions__.current.misp_event: self.log('error', "Not connected to a MISP event.") return False ok, data = self.misp.download_samples(event_id=__sessions__.current.misp_event.event_id) if not ok: self.log('error', data) return to_print = [] for d in data: eid, filename, payload = d path = os.path.join(tempfile.gettempdir(), filename) with open(path, 'w') as f: f.write(payload.getvalue()) to_print.append((eid, path)) if len(to_print) == 1: self.log('success', 'The sample has been downloaded from Event {}'.format(to_print[0][0])) event = self.misp.get_event(to_print[0][0]) return __sessions__.new(to_print[0][1], MispEvent(event.json())) else: self.log('success', 'The following files have been downloaded:') for p in to_print: self.log('success', '\tEventID: {} - {}'.format(*p))
def search_local_hashes(self, event): local = [] samples_count = 0 for a in event['Event']['Attribute']: row = None if a['type'] == 'malware-sample': samples_count += 1 if a['type'] in ('malware-sample', 'filename|md5', 'md5'): h = a['value'] if '|' in a['type']: h = a['value'].split('|')[1] row = Database().find(key='md5', value=h) elif a['type'] in ('sha1', 'filename|sha1'): h = a['value'] if '|' in a['type']: h = a['value'].split('|')[1] row = Database().find(key='sha1', value=h) elif a['type'] in ('sha256', 'filename|sha256'): h = a['value'] if '|' in a['type']: h = a['value'].split('|')[1] row = Database().find(key='sha256', value=h) if row: local.append(row[0]) self.log('info', 'This event contains {} samples.'.format(samples_count)) shas = set([l.sha256 for l in local]) if len(shas) == 1: __sessions__.new(get_sample_path(shas.pop()), MispEvent(event)) elif len(shas) > 1: self.log('success', 'The following samples are in this cirtkit instance:') __sessions__.new(misp_event=MispEvent(event)) for s in shas: self.log('item', s) else: __sessions__.new(misp_event=MispEvent(event)) self.log('info', 'No known (in Viper) samples in that event.')
def resources(self): # Use this function to retrieve resources for the given PE instance. # Returns all the identified resources with indicators and attributes. def get_resources(pe): resources = [] if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): count = 1 for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries: try: resource = {} if resource_type.name is not None: name = str(resource_type.name) else: name = str( pefile.RESOURCE_TYPE.get( resource_type.struct.Id)) if name is None: name = str(resource_type.struct.Id) if hasattr(resource_type, 'directory'): for resource_id in resource_type.directory.entries: if hasattr(resource_id, 'directory'): for resource_lang in resource_id.directory.entries: data = pe.get_data( resource_lang.data.struct. OffsetToData, resource_lang.data.struct.Size) filetype = get_type(data) md5 = get_md5(data) language = pefile.LANG.get( resource_lang.data.lang, None) sublanguage = pefile.get_sublang_name_for_lang( resource_lang.data.lang, resource_lang.data.sublang) offset = ('%-8s' % hex(resource_lang.data.struct .OffsetToData)).strip() size = ( '%-8s' % hex(resource_lang.data.struct.Size) ).strip() resource = [ count, name, offset, md5, size, filetype, language, sublanguage ] # Dump resources if requested to and if the file currently being # processed is the opened session file. # This is to avoid that during a --scan all the resources being # scanned are dumped as well. if (self.args.open or self.args.dump ) and pe == self.pe: if self.args.dump: folder = self.args.dump else: folder = tempfile.mkdtemp() resource_path = os.path.join( folder, '{0}_{1}_{2}'.format( __sessions__.current.file. md5, offset, name)) resource.append(resource_path) with open(resource_path, 'wb') as resource_handle: resource_handle.write(data) resources.append(resource) count += 1 except Exception as e: self.log('error', e) continue return resources if not self.__check_session(): return # Obtain resources for the currently opened file. resources = get_resources(self.pe) if not resources: self.log('warning', "No resources found") return headers = [ '#', 'Name', 'Offset', 'MD5', 'Size', 'File Type', 'Language', 'Sublanguage' ] if self.args.dump or self.args.open: headers.append('Dumped To') self.log('table', dict(header=headers, rows=resources)) # If instructed, open a session on the given resource. if self.args.open: for resource in resources: if resource[0] == self.args.open: __sessions__.new(resource[8]) return # If instructed to perform a scan across the repository, start looping # through all available files. elif self.args.scan: self.log('info', "Scanning the repository for matching samples...") # Retrieve list of samples stored locally and available in the # database. db = Database() samples = db.find(key='all') matches = [] for sample in samples: # Skip if it's the same file. if sample.sha256 == __sessions__.current.file.sha256: continue # Obtain path to the binary. sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue # Open PE instance. try: cur_pe = pefile.PE(sample_path) except: continue # Obtain the list of resources for the current iteration. cur_resources = get_resources(cur_pe) matched_resources = [] # Loop through entry's resources. for cur_resource in cur_resources: # Loop through opened file's resources. for resource in resources: # If there is a common resource, add it to the list. if cur_resource[3] == resource[3]: matched_resources.append(resource[3]) # If there are any common resources, add the entry to the list # of matched samples. if len(matched_resources) > 0: matches.append([ sample.name, sample.md5, '\n'.join(r for r in matched_resources) ]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log( 'table', dict(header=['Name', 'MD5', 'Resource MD5'], rows=matches))
def run(self): super(Debup, self).run() if self.args is None: return if not __sessions__.is_set(): self.log('error', "No session opened") return if not HAVE_OLE: self.log( 'error', "Missing dependency, install olefile (`pip install olefile`)") return # Check for valid OLE if not olefile.isOleFile(__sessions__.current.file.path): self.log('error', "Not a valid BUP File") return # Extract all the contents from the bup file. ole = olefile.OleFileIO(__sessions__.current.file.path) # We know that BUPS are xor'd with 6A which is dec 106 for the decoder # This is the stored file. data = self.xordata(ole.openstream('File_0').read(), 106) # Get the details page data2 = self.xordata(ole.openstream('Details').read(), 106) # Close the OLE ole.close() # Process the details file rows = [] lines = data2.split('\n') for line in lines: if line.startswith('OriginalName'): fullpath = line.split('=')[1] pathsplit = fullpath.split('\\') filename = str(pathsplit[-1][:-1]) try: k, v = line.split('=') rows.append([k, v[:-1]]) # Strip the \r from v except: pass # If we opted to switch session then do that if data and self.args.session: try: tempName = os.path.join('/tmp', filename) with open(tempName, 'w') as temp: temp.write(data) self.log('info', "Switching Session to Embedded File") __sessions__.new(tempName) return except: self.log('error', "Unble to Switch Session") # Else jsut print the date else: self.log('info', "BUP Details:") self.log('table', dict(header=['Description', 'Value'], rows=rows))
def scan(self): def string_printable(line): line = str(line) new_line = '' for c in line: if c in printstring.printable: new_line += c else: new_line += '\\x' + c.encode('hex') return new_line # This means users can just drop or remove rule files without # having to worry about maintaining the index. # TODO: make paths absolute. # TODO: this regenerates the file at every run, perhaps we # could find a way to optimize this. def rule_index(): tmp_path = os.path.join(tempfile.gettempdir(), 'index.yara') with open(tmp_path, 'w') as rules_index: for rule_file in os.listdir(self.rule_path): # Skip if the extension is not right, could cause problems. if not rule_file.endswith( '.yar') and not rule_file.endswith('.yara'): continue # Skip if it's the index itself. if rule_file == 'index.yara': continue # Add the rule to the index. line = 'include "{0}"\n'.format( os.path.join(self.rule_path, rule_file)) rules_index.write(line) return tmp_path arg_rule = self.args.rule arg_scan_all = self.args.all arg_tag = self.args.tag # If no custom ruleset is specified, we use the default one. if not arg_rule: arg_rule = rule_index() # Check if the selected ruleset actually exists. if not os.path.exists(arg_rule): self.log('error', "No valid Yara ruleset at {0}".format(arg_rule)) return # Compile all rules from given ruleset. rules = yara.compile(arg_rule) files = [] # If there is a session open and the user didn't specifically # request to scan the full repository, we just add the currently # opened file's path. if __sessions__.is_set() and not arg_scan_all: files.append(__sessions__.current.file) # Otherwise we loop through all files in the repository and queue # them up for scan. else: self.log('info', "Scanning all stored files...") db = Database() samples = db.find(key='all') for sample in samples: files.append(sample) for entry in files: if entry.size == 0: continue self.log('info', "Scanning {0} ({1})".format(entry.name, entry.sha256)) # Check if the entry has a path attribute. This happens when # there is a session open. We need to distinguish this just for # the cases where we're scanning an opened file which has not been # stored yet. if hasattr(entry, 'path'): entry_path = entry.path # This should be triggered only when scanning the full repository. else: entry_path = get_sample_path(entry.sha256) # Check if the file exists before running the yara scan. if not os.path.exists(entry_path): self.log( 'error', "The file does not exist at path {0}".format(entry_path)) return rows = [] tag_list = [] for match in rules.match(entry_path): # Add a row for each string matched by the rule. for string in match.strings: rows.append([ match.rule, string_printable(string[1]), string_printable(string[0]), string_printable(string[2]) ]) # Add matching rules to our list of tags. # First it checks if there are tags specified in the metadata # of the Yara rule. match_tags = match.meta.get('tags') # If not, use the rule name. # TODO: as we add more and more yara rules, we might remove # this option and only tag the file with rules that had # tags specified in them. if not match_tags: match_tags = match.rule # Add the tags to the list. tag_list.append([entry.sha256, match_tags]) if rows: header = ['Rule', 'String', 'Offset', 'Content'] self.log('table', dict(header=header, rows=rows)) # If we selected to add tags do that now. if rows and arg_tag: db = Database() for tag in tag_list: db.add_tags(tag[0], tag[1]) # If in a session reset the session to see tags. if __sessions__.is_set() and not arg_scan_all: self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path)
def decompress(self, dump_dir): # Check if the file type is right. # TODO: this might be a bit hacky, need to verify whether malformed # Flash exploit would get a different file type. if 'Flash' not in __sessions__.current.file.type: self.log('error', "The opened file doesn't appear to be a valid SWF object") return # Retrieve key information from the opened SWF file. header, version, size, data = self.parse_swf() # Decompressed data. decompressed = None # Check if the file is already a decompressed Flash object. if header == 'FWS': self.log('info', "The opened file doesn't appear to be compressed") return # Check if the file is compressed with zlib. elif header == 'CWS': self.log('info', "The opened file appears to be compressed with Zlib") # Open an handle on the compressed data. compressed = StringIO(data) # Skip the header. compressed.read(3) # Decompress and reconstruct the Flash object. decompressed = 'FWS' + compressed.read(5) + zlib.decompress(compressed.read()) # Check if the file is compressed with lzma. elif header == 'ZWS': self.log('info', "The opened file appears to be compressed with Lzma") # We need an third party library to decompress this. if not HAVE_PYLZMA: self.log('error', "Missing dependency, please install pylzma (`pip install pylzma`)") return # Open and handle on the compressed data. compressed = StringIO(data) # Skip the header. compressed.read(3) # Decompress with pylzma and reconstruct the Flash object. ## ZWS(LZMA) ## | 4 bytes | 4 bytes | 4 bytes | 5 bytes | n bytes | 6 bytes | ## | 'ZWS'+version | scriptLen | compressedLen | LZMA props | LZMA data | LZMA end marker | decompressed = 'FWS' + compressed.read(5) compressed.read(4) # skip compressedLen decompressed += pylzma.decompress(compressed.read()) # If we obtained some decompressed data, we print it and eventually # dump it to file. if decompressed: # Print the decompressed data # TODO: this prints too much, need to find a better wayto display # this. Paginate? self.log('', cyan(hexdump(decompressed))) if dump_dir: # Dump the decompressed SWF file to the specified directory # or to the default temporary one. dump_path = os.path.join(dump_dir, '{0}.swf'.format(get_md5(decompressed))) with open(dump_path, 'wb') as handle: handle.write(decompressed) self.log('info', "Flash object dumped at {0}".format(dump_path)) # Directly open a session on the dumped Flash object. __sessions__.new(dump_path)
def att_session(att_id, msg, ole_flag): att_count = 0 if ole_flag: ole = msg # Hard part now, each part of the attachment is in a seperate stream # need to get a unique stream id for each att # its in the streamname as an 8 digit number. for i in range(20): # arbitrary count of emails. i dont expecet this many stream_number = str(i).zfill(8) stream_name = "__attach_version1.0_#" + stream_number # Unicode try: att_filename = ole.openstream(stream_name + "/__substg1.0_3704001F").read() att_filename = att_filename.replace("\x00", "") att_data = ole.openstream(stream_name + "/__substg1.0_37010102").read() except: pass # ASCII try: att_filename = ole.openstream(stream_name + "/__substg1.0_3704001E").read() att_data = ole.openstream(stream_name + "/__substg1.0_37010102").read() except: pass if i == att_id: self.log("info", "Switching session to {0}".format(att_filename)) tmp_path = os.path.join(tempfile.gettempdir(), att_filename) with open(tmp_path, "w") as tmp: tmp.write(att_data) __sessions__.new(tmp_path) return else: for part in msg.walk(): if part.get_content_type() == "message/rfc822": rfc822 = True else: rfc822 = False if part.get_content_maintype() == "multipart" or not part.get("Content-Disposition") and not rfc822: continue att_count += 1 if att_count == att_id: if rfc822: data = part.as_string() m = re.match("Content-Type: message/rfc822\r?\n\r?\n(.*)", data, flags=re.S) if not m: self.log("error", "Could not extract RFC822 formatted message") return data = m.group(1) att_size = len(data) filename = "rfc822msg_{0}.eml".format(att_size) else: data = part.get_payload(decode=True) filename = part.get_filename() self.log("info", "Switching session to {0}".format(filename)) if data: tmp_path = os.path.join(tempfile.gettempdir(), filename) with open(tmp_path, "w") as tmp: tmp.write(data) __sessions__.new(tmp_path) return
def run(self): super(Debup, self).run() if self.args is None: return if not __sessions__.is_set(): self.log('error', "No session opened") return if not HAVE_OLE: self.log('error', "Missing dependency, install olefile (`pip install olefile`)") return # Check for valid OLE if not olefile.isOleFile(__sessions__.current.file.path): self.log('error', "Not a valid BUP File") return # Extract all the contents from the bup file. ole = olefile.OleFileIO(__sessions__.current.file.path) # We know that BUPS are xor'd with 6A which is dec 106 for the decoder # This is the stored file. data = self.xordata(ole.openstream('File_0').read(), 106) # Get the details page data2 = self.xordata(ole.openstream('Details').read(), 106) # Close the OLE ole.close() # Process the details file rows = [] lines = data2.split('\n') for line in lines: if line.startswith('OriginalName'): fullpath = line.split('=')[1] pathsplit = fullpath.split('\\') filename = str(pathsplit[-1][:-1]) try: k, v = line.split('=') rows.append([k, v[:-1]]) # Strip the \r from v except: pass # If we opted to switch session then do that if data and self.args.session: try: tempName = os.path.join('/tmp', filename) with open(tempName, 'w') as temp: temp.write(data) self.log('info', "Switching Session to Embedded File") __sessions__.new(tempName) return except: self.log('error', "Unble to Switch Session") # Else jsut print the date else: self.log('info', "BUP Details:") self.log('table', dict(header=['Description', 'Value'], rows=rows))
def cmd_open(self, *args): parser = argparse.ArgumentParser(prog='open', description="Open a file", epilog="You can also specify a MD5 or SHA256 hash to a previously stored file in order to open a session on it.") group = parser.add_mutually_exclusive_group() group.add_argument('-f', '--file', action='store_true', help="Target is a file") group.add_argument('-u', '--url', action='store_true', help="Target is a URL") group.add_argument('-l', '--last', action='store_true', help="Target is the entry number from the last find command's results") parser.add_argument('-t', '--tor', action='store_true', help="Download the file through Tor") parser.add_argument("value", metavar='PATH, URL, HASH or ID', nargs='*', help="Target to open. Hash can be md5 or sha256. ID has to be from the last search.") try: args = parser.parse_args(args) except: return target = " ".join(args.value) if not args.last and target is None: parser.print_usage() return # If it's a file path, open a session on it. if args.file: target = os.path.expanduser(target) if not os.path.exists(target) or not os.path.isfile(target): self.log('error', "File not found: {0}".format(target)) return __sessions__.new(target) # If it's a URL, download it and open a session on the temporary file. elif args.url: data = download(url=target, tor=args.tor) if data: tmp = tempfile.NamedTemporaryFile(delete=False) tmp.write(data) tmp.close() __sessions__.new(tmp.name) # Try to open the specified file from the list of results from # the last find command. elif args.last: if __sessions__.find: count = 1 for item in __sessions__.find: if count == int(target): __sessions__.new(get_sample_path(item.sha256)) break count += 1 else: self.log('warning', "You haven't performed a find yet") # Otherwise we assume it's an hash of an previously stored sample. else: target = target.strip().lower() if len(target) == 32: key = 'md5' elif len(target) == 64: key = 'sha256' else: parser.print_usage() return rows = self.db.find(key=key, value=target) if not rows: self.log('warning', "No file found with the given hash {0}".format(target)) return path = get_sample_path(rows[0].sha256) if path: __sessions__.new(path)
def resources(self): # Use this function to retrieve resources for the given PE instance. # Returns all the identified resources with indicators and attributes. def get_resources(pe): resources = [] if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): count = 1 for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries: try: resource = {} if resource_type.name is not None: name = str(resource_type.name) else: name = str(pefile.RESOURCE_TYPE.get(resource_type.struct.Id)) if name is None: name = str(resource_type.struct.Id) if hasattr(resource_type, 'directory'): for resource_id in resource_type.directory.entries: if hasattr(resource_id, 'directory'): for resource_lang in resource_id.directory.entries: data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size) filetype = get_type(data) md5 = get_md5(data) language = pefile.LANG.get(resource_lang.data.lang, None) sublanguage = pefile.get_sublang_name_for_lang(resource_lang.data.lang, resource_lang.data.sublang) offset = ('%-8s' % hex(resource_lang.data.struct.OffsetToData)).strip() size = ('%-8s' % hex(resource_lang.data.struct.Size)).strip() resource = [count, name, offset, md5, size, filetype, language, sublanguage] # Dump resources if requested to and if the file currently being # processed is the opened session file. # This is to avoid that during a --scan all the resources being # scanned are dumped as well. if (self.args.open or self.args.dump) and pe == self.pe: if self.args.dump: folder = self.args.dump else: folder = tempfile.mkdtemp() resource_path = os.path.join(folder, '{0}_{1}_{2}'.format(__sessions__.current.file.md5, offset, name)) resource.append(resource_path) with open(resource_path, 'wb') as resource_handle: resource_handle.write(data) resources.append(resource) count += 1 except Exception as e: self.log('error', e) continue return resources if not self.__check_session(): return # Obtain resources for the currently opened file. resources = get_resources(self.pe) if not resources: self.log('warning', "No resources found") return headers = ['#', 'Name', 'Offset', 'MD5', 'Size', 'File Type', 'Language', 'Sublanguage'] if self.args.dump or self.args.open: headers.append('Dumped To') self.log('table', dict(header=headers, rows=resources)) # If instructed, open a session on the given resource. if self.args.open: for resource in resources: if resource[0] == self.args.open: __sessions__.new(resource[8]) return # If instructed to perform a scan across the repository, start looping # through all available files. elif self.args.scan: self.log('info', "Scanning the repository for matching samples...") # Retrieve list of samples stored locally and available in the # database. db = Database() samples = db.find(key='all') matches = [] for sample in samples: # Skip if it's the same file. if sample.sha256 == __sessions__.current.file.sha256: continue # Obtain path to the binary. sample_path = get_sample_path(sample.sha256) if not os.path.exists(sample_path): continue # Open PE instance. try: cur_pe = pefile.PE(sample_path) except: continue # Obtain the list of resources for the current iteration. cur_resources = get_resources(cur_pe) matched_resources = [] # Loop through entry's resources. for cur_resource in cur_resources: # Loop through opened file's resources. for resource in resources: # If there is a common resource, add it to the list. if cur_resource[3] == resource[3]: matched_resources.append(resource[3]) # If there are any common resources, add the entry to the list # of matched samples. if len(matched_resources) > 0: matches.append([sample.name, sample.md5, '\n'.join(r for r in matched_resources)]) self.log('info', "{0} relevant matches found".format(bold(len(matches)))) if len(matches) > 0: self.log('table', dict(header=['Name', 'MD5', 'Resource MD5'], rows=matches))
def att_session(att_id, msg, ole_flag): att_count = 0 if ole_flag: ole = msg # Hard part now, each part of the attachment is in a seperate stream # need to get a unique stream id for each att # its in the streamname as an 8 digit number. for i in range(20): # arbitrary count of emails. i dont expecet this many stream_number = str(i).zfill(8) stream_name = '__attach_version1.0_#' + stream_number # Unicode try: att_filename = ole.openstream(stream_name + '/__substg1.0_3704001F').read() att_filename = att_filename.replace('\x00', '') att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read() except: pass # ASCII try: att_filename = ole.openstream(stream_name + '/__substg1.0_3704001E').read() att_data = ole.openstream(stream_name + '/__substg1.0_37010102').read() except: pass if i == att_id: self.log('info', "Switching session to {0}".format(att_filename)) tmp_path = os.path.join(tempfile.gettempdir(), att_filename) with open(tmp_path, 'w') as tmp: tmp.write(att_data) __sessions__.new(tmp_path) return else: for part in msg.walk(): if part.get_content_type() == 'message/rfc822': rfc822 = True else: rfc822 = False if part.get_content_maintype() == 'multipart' \ or not part.get('Content-Disposition') \ and not rfc822: continue att_count += 1 if att_count == att_id: if rfc822: data = part.as_string() m = re.match("Content-Type: message/rfc822\r?\n\r?\n(.*)", data, flags=re.S) if not m: self.log('error', "Could not extract RFC822 formatted message") return data = m.group(1) att_size = len(data) filename = "rfc822msg_{0}.eml".format(att_size) else: data = part.get_payload(decode=True) filename = part.get_filename() self.log('info', "Switching session to {0}".format(filename)) if data: tmp_path = os.path.join(tempfile.gettempdir(), filename) with open(tmp_path, 'w') as tmp: tmp.write(data) __sessions__.new(tmp_path) return
def streams(self): def get_streams(): # This function is brutally ripped from Brandon Dixon's swf_mastah.py. # Initialize peepdf parser. parser = PDFParser() # Parse currently opened PDF document. ret, pdf = parser.parse(__sessions__.current.file.path, True, False) # Generate statistics. results = [] objects = [] count = 0 object_counter = 1 for i in range(len(pdf.body)): body = pdf.body[count] objects = body.objects for index in objects: oid = objects[index].id offset = objects[index].offset size = objects[index].size details = objects[index].object if details.type == 'stream': decoded_stream = details.decodedStream result = [ object_counter, oid, offset, size, get_type(decoded_stream)[:100] ] # If the stream needs to be dumped or opened, we do it # and expand the results with the path to the stream dump. if arg_open or arg_dump: # If was instructed to dump, we already have a base folder. if arg_dump: folder = arg_dump # Otherwise we juts generate a temporary one. else: folder = tempfile.gettempdir() # Confirm the dump path if not os.path.exists(folder): try: os.makedirs(folder) except Exception as e: self.log( 'error', "Unable to create directory at {0}: {1}" .format(folder, e)) return results else: if not os.path.isdir(folder): self.log( 'error', "You need to specify a folder not a file" ) return results # Dump stream to this path. # TODO: sometimes there appear to be multiple streams # with the same object ID. Is that even possible? # It will cause conflicts. dump_path = '{0}/{1}_{2}_pdf_stream.bin'.format( folder, __sessions__.current.file.md5, object_counter) with open(dump_path, 'wb') as handle: handle.write(decoded_stream.strip()) # Add dump path to the stream attributes. result.append(dump_path) # Update list of streams. results.append(result) object_counter += 1 count += 1 return results arg_open = self.args.open arg_dump = self.args.dump # Retrieve list of streams. streams = get_streams() # Show list of streams. header = ['#', 'ID', 'Offset', 'Size', 'Type'] if arg_dump or arg_open: header.append('Dumped To') self.log('table', dict(header=header, rows=streams)) # If the user requested to open a specific stream, we open a new # session on it. if arg_open: for stream in streams: if int(arg_open) == int(stream[0]): __sessions__.new(stream[5]) return
def scan(self): def string_printable(line): line = str(line) new_line = '' for c in line: if c in printstring.printable: new_line += c else: new_line += '\\x' + c.encode('hex') return new_line # This means users can just drop or remove rule files without # having to worry about maintaining the index. # TODO: make paths absolute. # TODO: this regenerates the file at every run, perhaps we # could find a way to optimize this. def rule_index(): tmp_path = os.path.join(tempfile.gettempdir(), 'index.yara') with open(tmp_path, 'w') as rules_index: for rule_file in os.listdir(self.rule_path): # Skip if the extension is not right, could cause problems. if not rule_file.endswith('.yar') and not rule_file.endswith('.yara'): continue # Skip if it's the index itself. if rule_file == 'index.yara': continue # Add the rule to the index. line = 'include "{0}"\n'.format(os.path.join(self.rule_path, rule_file)) rules_index.write(line) return tmp_path arg_rule = self.args.rule arg_scan_all = self.args.all arg_tag = self.args.tag # If no custom ruleset is specified, we use the default one. if not arg_rule: arg_rule = rule_index() # Check if the selected ruleset actually exists. if not os.path.exists(arg_rule): self.log('error', "No valid Yara ruleset at {0}".format(arg_rule)) return # Compile all rules from given ruleset. rules = yara.compile(arg_rule) files = [] # If there is a session open and the user didn't specifically # request to scan the full repository, we just add the currently # opened file's path. if __sessions__.is_set() and not arg_scan_all: files.append(__sessions__.current.file) # Otherwise we loop through all files in the repository and queue # them up for scan. else: self.log('info', "Scanning all stored files...") db = Database() samples = db.find(key='all') for sample in samples: files.append(sample) for entry in files: if entry.size == 0: continue self.log('info', "Scanning {0} ({1})".format(entry.name, entry.sha256)) # Check if the entry has a path attribute. This happens when # there is a session open. We need to distinguish this just for # the cases where we're scanning an opened file which has not been # stored yet. if hasattr(entry, 'path'): entry_path = entry.path # This should be triggered only when scanning the full repository. else: entry_path = get_sample_path(entry.sha256) # Check if the file exists before running the yara scan. if not os.path.exists(entry_path): self.log('error', "The file does not exist at path {0}".format(entry_path)) return rows = [] tag_list = [] for match in rules.match(entry_path): # Add a row for each string matched by the rule. for string in match.strings: rows.append([match.rule, string_printable(string[1]), string_printable(string[0]), string_printable(string[2])]) # Add matching rules to our list of tags. # First it checks if there are tags specified in the metadata # of the Yara rule. match_tags = match.meta.get('tags') # If not, use the rule name. # TODO: as we add more and more yara rules, we might remove # this option and only tag the file with rules that had # tags specified in them. if not match_tags: match_tags = match.rule # Add the tags to the list. tag_list.append([entry.sha256, match_tags]) if rows: header = [ 'Rule', 'String', 'Offset', 'Content' ] self.log('table', dict(header=header, rows=rows)) # If we selected to add tags do that now. if rows and arg_tag: db = Database() for tag in tag_list: db.add_tags(tag[0], tag[1]) # If in a session reset the session to see tags. if __sessions__.is_set() and not arg_scan_all: self.log('info', "Refreshing session to update attributes...") __sessions__.new(__sessions__.current.file.path)
def streams(self): def get_streams(): # This function is brutally ripped from Brandon Dixon's swf_mastah.py. # Initialize peepdf parser. parser = PDFParser() # Parse currently opened PDF document. ret, pdf = parser.parse(__sessions__.current.file.path, True, False) # Generate statistics. results = [] objects = [] count = 0 object_counter = 1 for i in range(len(pdf.body)): body = pdf.body[count] objects = body.objects for index in objects: oid = objects[index].id offset = objects[index].offset size = objects[index].size details = objects[index].object if details.type == 'stream': decoded_stream = details.decodedStream result = [ object_counter, oid, offset, size, get_type(decoded_stream)[:100] ] # If the stream needs to be dumped or opened, we do it # and expand the results with the path to the stream dump. if arg_open or arg_dump: # If was instructed to dump, we already have a base folder. if arg_dump: folder = arg_dump # Otherwise we juts generate a temporary one. else: folder = tempfile.gettempdir() # Confirm the dump path if not os.path.exists(folder): try: os.makedirs(folder) except Exception as e: self.log('error', "Unable to create directory at {0}: {1}".format(folder, e)) return results else: if not os.path.isdir(folder): self.log('error', "You need to specify a folder not a file") return results # Dump stream to this path. # TODO: sometimes there appear to be multiple streams # with the same object ID. Is that even possible? # It will cause conflicts. dump_path = '{0}/{1}_{2}_pdf_stream.bin'.format(folder, __sessions__.current.file.md5, object_counter) with open(dump_path, 'wb') as handle: handle.write(decoded_stream.strip()) # Add dump path to the stream attributes. result.append(dump_path) # Update list of streams. results.append(result) object_counter += 1 count += 1 return results arg_open = self.args.open arg_dump = self.args.dump # Retrieve list of streams. streams = get_streams() # Show list of streams. header = ['#', 'ID', 'Offset', 'Size', 'Type'] if arg_dump or arg_open: header.append('Dumped To') self.log('table', dict(header=header, rows=streams)) # If the user requested to open a specific stream, we open a new # session on it. if arg_open: for stream in streams: if int(arg_open) == int(stream[0]): __sessions__.new(stream[5]) return
def do_get(self, line): ''' Command: get Description: Get (copy) a file, or parts of file, from the sensor. Args: get [OPTIONS] <RemotePath> <LocalPath> where OPTIONS are: -o, --offset : The offset to start getting the file at -b, --bytes : How many bytes of the file to get. The default is all bytes. ''' self._needs_attached() import tempfile if __project__.name: pass else: print_error("Must open an investigation to retrieve files") return # close session of current file if opened if __sessions__: __sessions__.close() # establish connection to db db = Database() p = CliArgs(usage='get [OPTIONS] <RemoteFile> <LocalName>') p.add_option('-o', '--offset', default="0", help='Offset of the file to start grabbing') p.add_option('-b', '--bytes', default=None, help='How many bytes to grab') (opts, args) = p.parse_line(line) if len(args) != 2: raise CliArgsException("Wrong number of args to get command") # Create a new temporary file. fout = tempfile.NamedTemporaryFile(delete=False) # Fix file path gfile = self._file_path_fixup(args[0]) hargs = {} offset = 0 if opts.offset != 0: hargs['offset'] = int(opts.offset) if opts.bytes: hargs['get_count'] = int(opts.bytes) try: ret = self._postCommandAndWait("get file", gfile, args=hargs) fid = ret["file_id"] url = '%s/api/v1/cblr/session/%d/file/%d/content' % (self.url, self.session, fid) fdata = self._doGet(url, retJSON=False) fout.write(fdata) fout.close() __sessions__.new(fout.name) store_sample(__sessions__.current.file) __sessions__.current.file.path = get_sample_path(__sessions__.current.file.sha256) db.add(obj=__sessions__.current.file) os.remove(fout.name) except: # delete the output file on error fout.close() os.remove(fout.name) raise
def decompress(self, dump_dir): # Check if the file type is right. # TODO: this might be a bit hacky, need to verify whether malformed # Flash exploit would get a different file type. if 'Flash' not in __sessions__.current.file.type: self.log( 'error', "The opened file doesn't appear to be a valid SWF object") return # Retrieve key information from the opened SWF file. header, version, size, data = self.parse_swf() # Decompressed data. decompressed = None # Check if the file is already a decompressed Flash object. if header == 'FWS': self.log('info', "The opened file doesn't appear to be compressed") return # Check if the file is compressed with zlib. elif header == 'CWS': self.log('info', "The opened file appears to be compressed with Zlib") # Open an handle on the compressed data. compressed = StringIO(data) # Skip the header. compressed.read(3) # Decompress and reconstruct the Flash object. decompressed = 'FWS' + compressed.read(5) + zlib.decompress( compressed.read()) # Check if the file is compressed with lzma. elif header == 'ZWS': self.log('info', "The opened file appears to be compressed with Lzma") # We need an third party library to decompress this. if not HAVE_PYLZMA: self.log( 'error', "Missing dependency, please install pylzma (`pip install pylzma`)" ) return # Open and handle on the compressed data. compressed = StringIO(data) # Skip the header. compressed.read(3) # Decompress with pylzma and reconstruct the Flash object. ## ZWS(LZMA) ## | 4 bytes | 4 bytes | 4 bytes | 5 bytes | n bytes | 6 bytes | ## | 'ZWS'+version | scriptLen | compressedLen | LZMA props | LZMA data | LZMA end marker | decompressed = 'FWS' + compressed.read(5) compressed.read(4) # skip compressedLen decompressed += pylzma.decompress(compressed.read()) # If we obtained some decompressed data, we print it and eventually # dump it to file. if decompressed: # Print the decompressed data # TODO: this prints too much, need to find a better wayto display # this. Paginate? self.log('', cyan(hexdump(decompressed))) if dump_dir: # Dump the decompressed SWF file to the specified directory # or to the default temporary one. dump_path = os.path.join( dump_dir, '{0}.swf'.format(get_md5(decompressed))) with open(dump_path, 'wb') as handle: handle.write(decompressed) self.log('info', "Flash object dumped at {0}".format(dump_path)) # Directly open a session on the dumped Flash object. __sessions__.new(dump_path)
def run(self): super(VirusTotal, self).run() if self.args is None: return if self.args.hash: try: params = {'apikey': KEY,'hash':self.args.hash} response = requests.get(VIRUSTOTAL_URL_DOWNLOAD, params=params) if response.status_code == 403: self.log('error','This command requires virustotal private API key') self.log('error','Please check that your key have the right permissions') return if response.status_code == 200: response = response.content tmp = tempfile.NamedTemporaryFile(delete=False) tmp.write(response) tmp.close() return __sessions__.new(tmp.name) except Exception as e: self.log('error', "Failed to download file: {0}".format(e)) if not HAVE_REQUESTS: self.log('error', "Missing dependency, install requests (`pip install requests`)") return if not __sessions__.is_set(): self.log('error', "No session opened") return data = {'resource': __sessions__.current.file.md5, 'apikey': KEY} try: response = requests.post(VIRUSTOTAL_URL, data=data) except Exception as e: self.log('error', "Failed performing request: {0}".format(e)) return try: virustotal = response.json() # since python 2.7 the above line causes the Error dict object not callable except Exception as e: # workaround in case of python 2.7 if str(e) == "'dict' object is not callable": try: virustotal = response.json except Exception as e: self.log('error', "Failed parsing the response: {0}".format(e)) self.log('error', "Data:\n{}".format(response.content)) return else: self.log('error', "Failed parsing the response: {0}".format(e)) self.log('error', "Data:\n{}".format(response.content)) return rows = [] if 'scans' in virustotal: for engine, signature in virustotal['scans'].items(): if signature['detected']: signature = signature['result'] else: signature = '' rows.append([engine, signature]) rows.sort() if rows: self.log('info', "VirusTotal Report:") self.log('table', dict(header=['Antivirus', 'Signature'], rows=rows)) if self.args.submit: self.log('', "") self.log('info', "The file is already available on VirusTotal, no need to submit") else: self.log('info', "The file does not appear to be on VirusTotal yet") if self.args.submit: try: data = {'apikey': KEY} files = {'file': open(__sessions__.current.file.path, 'rb').read()} response = requests.post(VIRUSTOTAL_URL_SUBMIT, data=data, files=files) except Exception as e: self.log('error', "Failed Submit: {0}".format(e)) return try: virustotal = response.json() # since python 2.7 the above line causes the Error dict object not callable except Exception as e: # workaround in case of python 2.7 if str(e) == "'dict' object is not callable": try: virustotal = response.json except Exception as e: self.log('error', "Failed parsing the response: {0}".format(e)) self.log('error', "Data:\n{}".format(response.content)) return else: self.log('error', "Failed parsing the response: {0}".format(e)) self.log('error', "Data:\n{}".format(response.content)) return if 'verbose_msg' in virustotal: self.log('info', "{}: {}".format(bold("VirusTotal message"), virustotal['verbose_msg'])) if self.args.comment: try: data = {'apikey' : KEY, 'resource': __sessions__.current.file.md5, 'comment' : ' '.join(self.args.comment)} response = requests.post(VIRUSTOTAL_URL_COMMENT,data=data) except Exception as e: self.log('error',"Failed Submit Comment: {0}".format(e)) return try: virustotal = response.json() # since python 2.7 the above line causes the Error dict object not callable except Exception as e: # workaround in case of python 2.7 if str(e) == "'dict' object is not callable": try: virustotal = response.json except Exception as e: self.log('error',"Failed parsing the response: {0}".format(e)) self.log('error',"Data:\n{}".format(response.content)) return else: self.log('error',"Failed parsing the response: {0}".format(e)) self.log('error',"Data:\n{}".format(response.content)) return if 'verbose_msg' in virustotal: self.log('info',("{}: {}".format(bold("VirusTotal message"), virustotal['verbose_msg']))) return