def parse_SysCache_hive(self): outfolder = self.myconfig('voutdir') if self.vss else self.myconfig( 'outdir') # self.tl_file = os.path.join(self.myconfig('timelinesdir'), "%s_BODY.csv" % self.myconfig('source')) check_directory(outfolder, create=True) SYSC = self.search.search(r"/System Volume Information/SysCache.hve$") ripcmd = self.config.get('plugins.common', 'rip', '/opt/regripper/rip.pl') for f in SYSC: p = f.split('/')[2] output_text = run_command([ ripcmd, "-r", os.path.join(self.myconfig('casedir'), f), "-p", "syscache_csv" ], logger=self.logger()) output_file = os.path.join(outfolder, "syscache_%s.csv" % p) self.path_from_inode = FileSystem( config=self.config).load_path_from_inode(self.myconfig, p, vss=self.vss) save_csv(self.parse_syscache_csv(p, output_text), outfile=output_file, file_exists='OVERWRITE') self.logger().info("Finished extraction from SysCache")
def run(self, path=""): """ Parse UsnJrnl files of a disk """ self.vss = self.myflag('vss') disk = getSourceImage(self.myconfig) self.usn_path = self.myconfig( 'voutdir') if self.vss else self.myconfig('outdir') check_folder(self.usn_path) self.usn_jrnl_file = os.path.join(self.usn_path, "UsnJrnl") self.filesystem = FileSystem(self.config, disk=disk) for p in disk.partitions: if not p.isMountable: continue if not self.vss: pname = ''.join(['p', p.partition]) self._parse_usnjrnl(pname) else: for v, dev in p.vss.items(): if dev == "": continue self._parse_usnjrnl(v) # Delete the temporal UsnJrnl dumped file if os.path.exists(self.usn_jrnl_file): os.remove(self.usn_jrnl_file) return []
def run(self, path=""): """ Parses lnk files, jumlists and customdestinations """ self.logger().info("Extraction of lnk files") self.Files = GetFiles(self.config, vss=self.myflag("vss")) self.filesystem = FileSystem(self.config) self.mountdir = self.myconfig('mountdir') lnk_path = self.myconfig('{}outdir'.format('v' if self.vss else '')) check_folder(lnk_path) users = get_user_list(self.mountdir, self.vss) artifacts = { 'lnk': { 'filename': "{}_lnk.csv", 'regex': r"{}/.*\.lnk$", 'function': self.lnk_parser }, 'autodest': { 'filename': "{}_jl.csv", 'regex': r"{}/.*\.automaticDestinations-ms$", 'function': self.automaticDest_parser }, 'customdest': { 'filename': "{}_jlcustom.csv", 'regex': r"{}/.*\.customDestinations-ms$", 'function': self.customDest_parser } } for user in users: usr = "******".format(user.split("/")[0], user.split("/")[2]) for a_name, artifact in artifacts.items(): out_file = os.path.join(lnk_path, artifact['filename'].format(usr)) files_list = list( self.Files.search(artifact['regex'].format(user))) self.logger().info( "Founded {} {} files for user {} at {}".format( len(files_list), a_name, user.split("/")[-1], user.split("/")[0])) if len(files_list) > 0: save_csv(artifact['function'](files_list), config=self.config, outfile=out_file, quoting=0, file_exists='OVERWRITE') self.logger().info( "{} extraction done for user {} at {}".format( a_name, user.split("/")[-1], user.split("/")[0])) self.logger().info("RecentFiles extraction done") return []
def run(self, path=""): """ Characterizes a disk image """ self.disk = getSourceImage(self.myconfig) self.filesystem = FileSystem(self.config, disk=self.disk) self.characterize_Linux() return []
def run(self, path=""): """ Main function to extract $Recycle.bin files. """ if self.vss: output_path = self.myconfig('voutdir') else: output_path = self.myconfig('outdir') try: check_file(self.timeline_file, error_missing=True) except base.job.RVTError: return [] check_directory(output_path, create=True) self.filesystem = FileSystem(self.config) # Get the users associated with each SID for every partition self.sid_user = {} if self.vss: for p in self.vss_partitions: self.sid_user[p] = self.generate_SID_user(p) else: for p in self.partitions: self.sid_user[p] = self.generate_SID_user(p) self.logger().info('Starting to parse RecycleBin') # RB_codes relates a a six digit recyclebin code with a path for a file. Are updated for each partition or vss? self.RB_codes = {} if self.vss: for partition in self.vss_partitions: self.logger().info( 'Processing Recycle Bin in partition {}'.format(partition)) try: self.parse_RecycleBin(partition) except Exception as exc: if self.myflag('stop_on_error'): raise exc continue output_file = os.path.join( output_path, "{}_recycle_bin.csv".format(partition)) self.save_recycle_files(output_file, partition, sorting=True) else: try: self.parse_RecycleBin() except Exception as exc: if self.myflag('stop_on_error'): raise exc return [] output_file = os.path.join(output_path, "recycle_bin.csv") self.save_recycle_files(output_file, sorting=True) self.logger().info("Done parsing Recycle Bin!") return []
def run(self, path=""): self.disk = getSourceImage(self.myconfig) keyfile = path self.logger().debug('Testing existance of {}'.format(keyfile)) if not keyfile: keyfile = self.myconfig('keyfile') check_file(keyfile, error_missing=True) # Get string files or generate them if not found self.string_path = self.myconfig('strings_dir') if not (check_directory(self.string_path) and os.listdir(self.string_path)): self.logger().debug("No string files found. Generating them") StringGenerate(config=self.config, disk=self.disk).generate_strings() self.search_path = self.myconfig('outdir') check_directory(self.search_path, create=True) self.keywords = getSearchItems( keyfile) # Get kw:regex dictionary reading keyfile self.blocks = { } # Store set of blocks for kw and partition. Ex: {'my_kw': {'p02': set(1234, 1235, ...)}} self.block_status = defaultdict( dict ) # Store status for blocks with search hits in a partition. Ex:{'03':{4547:'Allocated', 1354536:'Not Allocated'}} self.fs_object = FileSystem(self.config, disk=self.disk) # Generate or load 'hits_' and 'blocks_' files for kname in tqdm(self.keywords, total=len(self.keywords), desc='Searching keywords in strings'): kw = kname.strip() self.get_blocks(kw, self.keywords[kname]) # Generate 'all_' files self.get_cluster() self.logger().info("StringSearch done") return []
class SysCache(base.job.BaseModule): def run(self, path=""): self.search = GetFiles(self.config, vss=self.myflag("vss")) self.vss = self.myflag('vss') self.logger().info("Parsing Syscache from registry") self.parse_SysCache_hive() return [] def parse_SysCache_hive(self): outfolder = self.myconfig('voutdir') if self.vss else self.myconfig( 'outdir') # self.tl_file = os.path.join(self.myconfig('timelinesdir'), "%s_BODY.csv" % self.myconfig('source')) check_directory(outfolder, create=True) SYSC = self.search.search(r"/System Volume Information/SysCache.hve$") ripcmd = self.config.get('plugins.common', 'rip', '/opt/regripper/rip.pl') for f in SYSC: p = f.split('/')[2] output_text = run_command([ ripcmd, "-r", os.path.join(self.myconfig('casedir'), f), "-p", "syscache_csv" ], logger=self.logger()) output_file = os.path.join(outfolder, "syscache_%s.csv" % p) self.path_from_inode = FileSystem( config=self.config).load_path_from_inode(self.myconfig, p, vss=self.vss) save_csv(self.parse_syscache_csv(p, output_text), outfile=output_file, file_exists='OVERWRITE') self.logger().info("Finished extraction from SysCache") def parse_syscache_csv(self, partition, text): for line in text.split('\n')[:-1]: line = line.split(",") fileID = line[1] inode = line[1].split('/')[0] name = self.path_from_inode.get(inode, [''])[0] try: yield OrderedDict([("Date", dateutil.parser.parse( line[0]).strftime("%Y-%m-%dT%H:%M%SZ")), ("Name", name), ("FileID", fileID), ("Sha1", line[2])]) except Exception: yield OrderedDict([("Date", dateutil.parser.parse( line[0]).strftime("%Y-%m-%dT%H:%M%SZ")), ("Name", name), ("FileID", fileID), ("Sha1", "")])
class StringSearch(base.job.BaseModule): """ Find strings that matches regular expression. There are three different output files types: - *hits_somekeyword*: For every hit in the search of 'somekeyword' in strings, show: Partition;Offset;Block;Status;String - *blocks_somekeyword*: All blocks (clusters) associated with a hit for a partition. It is an intermediate file, only for perfoming purposes - *all_somekeyword*: Displays every block where somekeyword has been found, along with the next information: Partition;Block;Inode;InodeStatus;PossibleFilename Parameter: path (str): filename with keywords to seek (same as keyfile in configuration) Configuration: - **keyfile**: default filename with keywords in case path is not specified - **outdir**: path to directory where generated match files will be stored - **strings_dir**: path to directory where string files are generated. Warning: if a keyword is found between two consecutive blocks, result won't be shown. """ def run(self, path=""): self.disk = getSourceImage(self.myconfig) keyfile = path self.logger().debug('Testing existance of {}'.format(keyfile)) if not keyfile: keyfile = self.myconfig('keyfile') check_file(keyfile, error_missing=True) # Get string files or generate them if not found self.string_path = self.myconfig('strings_dir') if not (check_directory(self.string_path) and os.listdir(self.string_path)): self.logger().debug("No string files found. Generating them") StringGenerate(config=self.config, disk=self.disk).generate_strings() self.search_path = self.myconfig('outdir') check_directory(self.search_path, create=True) self.keywords = getSearchItems( keyfile) # Get kw:regex dictionary reading keyfile self.blocks = { } # Store set of blocks for kw and partition. Ex: {'my_kw': {'p02': set(1234, 1235, ...)}} self.block_status = defaultdict( dict ) # Store status for blocks with search hits in a partition. Ex:{'03':{4547:'Allocated', 1354536:'Not Allocated'}} self.fs_object = FileSystem(self.config, disk=self.disk) # Generate or load 'hits_' and 'blocks_' files for kname in tqdm(self.keywords, total=len(self.keywords), desc='Searching keywords in strings'): kw = kname.strip() self.get_blocks(kw, self.keywords[kname]) # Generate 'all_' files self.get_cluster() self.logger().info("StringSearch done") return [] def get_blocks(self, kw, regex): """ Updates variable self.blocks, that stores set of blocks for kw and partition, creating new 'block' and 'hits' files """ self.blocks_file_path = os.path.join(self.search_path, "blocks_{}".format(kw)) hits_file = os.path.join(self.search_path, "hits_%s" % kw) # Create hits file if not found if not check_file(hits_file) or os.path.getsize(hits_file) == 0: self.logger().debug('Creating {} file'.format("hits_%s" % kw)) extra_args = {'write_header': True, 'file_exists': 'OVERWRITE'} save_csv(self.search_strings(kw, regex), config=self.config, outfile=hits_file, **extra_args) # Create or load blocks file if not found if not check_file(self.blocks_file_path) or os.path.getsize( self.blocks_file_path) == 0: self.blocks[kw] = defaultdict(list) cmd = "sed -n '1!p' {} | cut -d ';' -f1,3 | sort | uniq".format( hits_file) for line in yield_command(cmd, logger=self.logger()): part, blk = line.split(';') part = part.strip('"') self.blocks[kw][part].append(int(blk.strip('"').rstrip('\n'))) self.save_blocks_file(self.blocks[kw], kw) else: self.logger().info('Loading {} file'.format("blocks_%s" % kw)) try: with open(self.blocks_file_path, "r") as block_file: self.blocks[kw] = json.load(block_file) except Exception as exc: self.logger().error('Cannot load {}'.format( self.blocks_file_path)) raise exc def search_strings(self, kw, regex): """ Generates a string search and yields hits. Also stores blocks where there's a match for the keyword 'kw'. Parameters: kw (str): keyword name regex (str): regular expression associated to keyword Yields: Dictionaries containing partition, block, offset and string match """ self.logger().info('Searching keyword {} with regex {}'.format( kw, regex)) partitions = { p.partition: [p.loop if p.loop != "" else "", p.clustersize] for p in self.disk.partitions } blocks = {} for p in self.disk.partitions: blocks.update({''.join(['p', p.partition]): set()}) # In string files to search, all characters are lowercase, so the '-i' option is no needed grep = self.myconfig('grep', '/bin/grep') args = "-H" if kw == regex else "-HP" regex_search = [regex] if regex else [kw] search_command = '{} {} '.format(grep, args) + '"{regex}" "{path}"' module = base.job.load_module(self.config, 'base.commands.RegexFilter', extra_config=dict( cmd=search_command, keyword_list=regex_search, from_dir=self.string_path)) srch = re.compile(r"(p\d{1,2})_strings_?[\w.]+:\s*(\d+)\s+(.*)") for f in os.listdir(self.string_path): for match in module.run(os.path.join(self.string_path, f)): line = match['match'] aux = srch.match(line) if not aux: continue pname, offset, string = aux.group(1), aux.group(2), aux.group( 3) pt = pname[1:] bsize = int(partitions[pt][1]) try: blk = int(offset) // bsize if blk not in self.block_status[pt]: self.block_status[pt][ blk] = self.fs_object.cluster_allocation_status( pname, str(blk)) status = self.block_status[pt].get(blk) except Exception as exc: self.logger().error('Error searching {} in line {}'.format( srch, line)) raise exc if blk not in blocks[pname]: # new block blocks[pname].add(blk) yield OrderedDict([('Partition', pname), ('Offset', int(offset)), ('Block', blk), ('Status', status), ('String', string)]) # Save blocks where a kw has been found if not check_file(self.blocks_file_path): self.save_blocks_file(blocks, kw) def save_blocks_file(self, blocks, kw): self.logger().info('Creating {} file'.format("blocks_%s" % kw)) blocks = {p: list(b) for p, b in blocks.items() } # json does not accept set structure outfile = os.path.join(self.search_path, "blocks_%s" % kw) save_json((lambda: (yield blocks))(), config=self.config, outfile=outfile, file_exists='OVERWRITE') def get_cluster(self): """ Generates report files containing information about the block where a hit is found, along with the contents of the block itself. """ self.inode_from_block = {} self.inode_status = {} self.path_from_inode = {} self.path_from_inode_del = {} # Creating relation between every inode and its blocks takes a long time. # Searching only the required blocks, although slower one by one, colud be faster if the list is short blocks_thereshold = 20000 # it takes about an hour sum_blocks = 0 for kw, parts in self.blocks.items(): for p in parts: sum_blocks += len(parts[p]) if sum_blocks > blocks_thereshold: for p in self.disk.partitions: if not p.isMountable or p.filesystem == "NoName": continue self.inode_from_block['p{}'.format( p.partition)] = self.fs_object.load_inode_from_block( partition='p{}'.format(p.partition)) # Get the necessary files relating inodes with paths and status for p in self.disk.partitions: if not p.isMountable or p.filesystem == "NoName": continue part_name = 'p{}'.format(p.partition) self.inode_status[part_name] = self.fs_object.load_inode_status( partition=part_name) self.path_from_inode[ part_name] = self.fs_object.load_path_from_inode( partition=part_name) self.path_from_inode_del[ part_name] = self.fs_object.load_path_from_inode( partition=part_name, deleted=True) self.used_blocks = defaultdict(set) self.block_inodes = defaultdict(dict) for kw in self.blocks: all_file = os.path.join(self.search_path, "all_{}".format(kw)) if check_file(all_file) and os.path.getsize(all_file) != 0: self.logger().info( 'File {} already generated'.format(all_file)) continue with open(all_file, "wb") as all_stream: for entry in self.all_info(self.blocks[kw], kw): all_stream.write(entry) def all_info(self, kw_blocks, kw=''): """ Yields partition, block, inode, status, file and block content for each block where there is a match for 'kw' Parameters: kw_blocks (dict): mapping between partition and blocks with a hit for a keyword kw (str): keyword name """ for p_name, blks in kw_blocks.items(): # p_name = ''.join(['p', pt]) for blk in tqdm( blks, total=len(blks), desc='Dumping searches for {} in partition {}'.format( kw, p_name)): self.used_blocks[p_name].add(blk) if blk not in self.block_inodes[p_name]: inodes = self.fs_object.inode_from_cluster( p_name, blk, self.inode_from_block.get(p_name, None)) self.block_inodes[p_name][blk] = inodes else: inodes = self.block_inodes[p_name][blk] if not inodes: yield "Pt: {}; Blk: {}; Inode: {} {}; File: {}\n".format( p_name, blk, '', 'Not Allocated', '').encode() for inode in inodes: status = self.inode_status[p_name].get(inode, "f") try: paths = self.path_from_inode[p_name][inode] except KeyError: paths = self.path_from_inode_del[p_name].get( inode, [""]) for name in paths: alloc = 'Allocated' if status == 'a' else 'Not Allocated' yield "Pt: {}; Blk: {}; Inode: {} {}; File: {}\n".format( p_name, blk, inode, alloc, name).encode() yield b"\n" yield self.fs_object.cluster_extract(p_name, str(blk)) yield '\n\n{}\n'.format('-' * 42).encode()
def run(self, path=""): """ Generator of INDX entries as dictionaries. Also writes to csv files""" self.disk = getSourceImage(self.myconfig) self.sector_size = self.disk.sectorsize self.parseINDX_ROOTFiles = self.myflag( 'root', False) # Parse also INDX_ROOT records if set self.skip_short_filenames = self.myflag('skip_short', False) self.only_slack = self.myflag('only_slack', False) outdir = self.myconfig('outdir') check_directory(outdir, create=True) for p in self.disk.partitions: if not p.isMountable: continue # Get a dictionary {inode: list of names} from 'fls' to later relate inodes to a path. 'inode' keys are strings, not int. part_name = ''.join(['p', p.partition]) try: self.inode_fls = FileSystem( self.config).load_path_from_inode(partition=part_name) self.logger().debug( 'Correctly loaded inode-name relation file for partiton {}' .format(part_name)) except Exception as e: self.logger().error(e) continue # Start the carving at next to last execution block parsed outfile = os.path.join( outdir, '{}{}_INDX_timeline.csv'.format( part_name, '_slack' if self.only_slack else '')) self.lastParsedBlk = 0 if self.myflag('use_localstore'): self.lastParsedBlk = int( self.config.store_get( 'last_{}_block_parsed'.format(part_name), 0)) self.logger().debug('lastParsedBlk: {}'.format(self.lastParsedBlk)) csv_args = {'file_exists': 'APPEND', 'write_header': True} if self.lastParsedBlk: if not os.path.exists(outfile): self.logger().warning( 'Starting new file {0} at an advanced offset. Set "last_{0}_block_parsed" at 0 in "store.ini" if a fresh start is desired' .format(outfile)) else: csv_args['write_header'] = False else: if os.path.exists(outfile): self.logger().warning( 'Overwriting file {}'.format(outfile)) csv_args['file_exists'] = 'OVERWRITE' # Write the parsed entries to a csv file for each partition. save_csv(self.parse_INDX(p), config=self.config, outfile=outfile, quoting=0, **csv_args) return []
class Characterize(base.job.BaseModule): def run(self, path=""): """ Characterizes a disk image """ self.disk = getSourceImage(self.myconfig) self.filesystem = FileSystem(self.config, disk=self.disk) self.characterize_Linux() return [] # disk_info = self.get_image_information(self.disk) # os_info = self.characterize_Windows(self.disk) # env = Environment(loader=FileSystemLoader(os.path.abspath(os.path.dirname(__file__)))) # template = env.get_template("templates/characterize.md") # analysisdir = self.myconfig('analysisdir') # with open(os.path.join(analysisdir, "characterize.md"), "w") as f: # output_text = template.render(disk_info=disk_info, os_info=os_info, source=self.myconfig('source')) # f.write(output_text) # def get_image_information(self, disk): # disk_info = {} # disk_info["Size"] = sizeof_fmt(os.stat(disk.imagefile).st_size) # disk_info["npart"] = disk.getPartitionNumber() # logfile = "{}.LOG".format(disk.imagefile[:-2]) # if not os.path.isfile(logfile): # logfile = "{}.LOG".format(disk.imagefile[:6]) # if os.path.isfile(logfile): # with open(logfile, "r") as f1: # for linea in f1: # aux = re.search("\*\s*(Model\s*:\s*[^\|]*)\|\s*Model\s*:", linea) # if aux: # disk_info["model"] = aux.group(1) # aux = re.search("\*\s*(Serial\s*:\s*[^\|]*)\|\s*Serial\s*:", linea) # if aux: # disk_info["serial_number"] = aux.group(1) # disk_info["partition"] = [] # for p in disk.partitions: # if p.filesystem != "Unallocated" and not p.filesystem.startswith("Primary Table"): # disk_info["partition"].append({"pnumber": p.partition, "size": sizeof_fmt(p.size), "type": p.filesystem}) # return disk_info def characterize_Linux(self): """ """ self.outfile = self.myconfig('outfile') check_directory(os.path.dirname(self.outfile), create=True) for p in self.disk.partitions: part_path = os.path.join(self.myconfig('mountdir'), "p%s" % p.partition) if not os.path.isdir(os.path.join(part_path, "etc")): continue releas_f = "" if os.path.isfile(os.path.join( part_path, "etc/lsb-release")) or os.path.islink( os.path.join(part_path, "etc/lsb-release")): releas_f = os.path.join(part_path, "etc/lsb-release") if os.path.islink(releas_f): releas_f = os.path.join(part_path, os.path.realpath(releas_f)[1:]) else: for f in os.listdir(os.path.join(part_path, "etc")): if f.endswith("-release"): releas_f = os.path.join(part_path, "etc", f) with open(self.outfile, 'w') as out_f: if releas_f != "": out_f.write("Information of partition {}\n\n".format( p.partition)) f_rel = open(releas_f, "r") dist_id = f_rel.readline().split("=")[-1].rstrip() dist_rel = f_rel.readline().split("=")[-1].rstrip() dist_coden = f_rel.readline().split("=")[-1].rstrip() dist_desc = f_rel.readline().split("=")[-1].rstrip() kernel_v = "" f_hostname = open(os.path.join(part_path, "etc/hostname"), "r") hostname = f_hostname.read().rstrip() f_hostname.close() f_rel.close() if os.path.isfile(os.path.join(part_path, "var/log/dmesg")): f_dmesg = open( os.path.join(part_path, "var/log/dmesg"), "r") for linea in f_dmesg: aux = re.search(r"(Linux version [^\s]*)", linea) if aux: kernel_v = aux.group(1) break f_dmesg.close() out_f.write( "Distribution ID:\t\t{}\nDistribution Release:\t\t{}\nDistribution codename:\t\t{}\nDistribution description:\t{}\nKernel version:\t{}\nHostname:\t{}\n" .format(dist_id, dist_rel, dist_coden, dist_desc, kernel_v, hostname)) install_date = "" if os.path.isdir( os.path.join(self.myconfig('mountdir'), "p%s" % p.partition, "root")): item = os.path.join(self.myconfig('source'), 'mnt', "p%s" % p.partition, "root") install_date = self.filesystem.get_macb([item])[item][3] for f in [ "root/install.log", "var/log/installer/syslog", "root/anaconda-ks.cfg" ]: if os.path.isfile( os.path.join(self.myconfig('mountdir'), "p%s" % p.partition, f)): item = os.path.join(self.myconfig('source'), 'mnt', "p%s" % p.partition, f) install_date = self.filesystem.get_macb([item ])[item][3] break if install_date != "": out_f.write("Install date:\t{}\n\n".format(install_date)) # usuarios self.get_linux_lastlog(p.partition) temp = self.get_linux_wtmp(os.path.join(part_path, "var/log")) # temp = subprocess.check_output('last -f {} --time-format iso'.format(os.path.join(part_path, "var/log/wtmp")), shell=True).decode("utf-8") with open(self.outfile, 'a') as out_f: out_f.write("\nLogins:\n\n{}".format(temp)) # Auxiliary functions def getrecord(self, file, uid, preserve=False): """ Returns [int(unix_time),string(device),string(host)] from the lastlog formated file object, set preserve = True to preserve your position within the file """ position = file.tell() recordsize = struct.calcsize('=L32s256s') file.seek(recordsize * uid) data = file.read(recordsize) if preserve: file.seek(position) try: returnlist = list(struct.unpack('=L32s256s', data)) returnlist[1] = returnlist[1][:int(returnlist[1].decode(). index('\x00'))] returnlist[2] = returnlist[2][:int(returnlist[2].decode(). index('\x00'))] return returnlist except Exception: recordsize = struct.calcsize('L32s256s') returnlist = list(struct.unpack('L32s256s', data)) returnlist[1] = returnlist[1][:int(returnlist[1].decode(). index('\x00'))] returnlist[2] = returnlist[2][:int(returnlist[2].decode(). index('\x00'))] return returnlist else: return False def get_linux_wtmp(self, log_path): """ Extrats login information """ output = "" for fichero in os.listdir(log_path): if fichero == "wtmp": temp = subprocess.check_output([ 'last', '-f', os.path.join(log_path, fichero), '--time-format', 'iso' ]) output += temp.decode() elif re.search(r"wtmp.*\.gz", fichero): temp_f = open("/tmp/wtmp.temp", "wb") with gzip.open(os.path.join(log_path, fichero), 'rb') as f: temp_f.write(f.read()) temp_f.close() temp = subprocess.check_output( ['last', '-f', '/tmp/wtmp.temp', '--time-format', 'iso']) output += temp.decode() return output def get_linux_lastlog(self, partition): # function to extract last logins table # TO DO extract UUID of loopdevices with blkid and compare with UUID of /home from /etc/fstab try: llfile = open( os.path.join(self.myconfig('mountdir'), "p%s" % partition, "var/log/lastlog"), 'rb') except Exception as exc: self.logger().error("Unable to open %s" % os.path.join(self.myconfig('mountdir'), "p%s" % partition, "var/log/lastlog")) raise exc user = dict() f_shadow = open( os.path.join(self.myconfig('mountdir'), "p%s" % partition, "etc/shadow"), "r") for linea in f_shadow: linea = linea.split(":") if len(linea[1]) > 1: # user with password user[linea[0]] = [] f_shadow.close() f_passwd = open( os.path.join(self.myconfig('mountdir'), "p%s" % partition, "etc/passwd"), "r") for linea in f_passwd: linea = linea.split(":") if linea[0] in user.keys(): user[linea[0]].append(linea[2]) f_passwd.close() lista = [] for k in user.keys(): lista.append( os.path.join(self.myconfig('source'), 'mnt', "p%s" % partition, "home", k)) user2 = self.filesystem.get_macb(lista) with open(self.outfile, 'a') as out_f: out_f.write('From timeline:\n') out_f.write('User\tm_time\ta_time\'c_time\tb_time\n') for u in user2: out_f.write('{}\t{}\t{}\t{}\t{}\n'.format( u.split('/')[-1], *user2[u])) out_f.write('\nFrom lastlog:\n') out_f.write('User\tuid\tLast login\tIP\n') for user, uid in user.items(): record = self.getrecord(llfile, int(uid[0])) if record and record[0] > 0: out_f.write('{}\t{}\t{}\t{}\n'.format( user, uid[0], datetime.datetime.fromtimestamp( int(record[0]).strftime('%Y-%m-%dT%H:%M:%SZ')), record[2].decode())) elif record: out_f.write('{}\t{}\t{}\t{}\n'.format( user, uid[0], " ", record[2].decode())) else: pass llfile.close()
class Recycle(base.job.BaseModule): """ Obtain a summary of all files found in the Recycle Bin Output file fields description: * Date: original file deletion date * Size: original deleted file size in bytes * File: path to file in Recycle Bin * OriginalName: original deleted file path * Inode: Inode number of the deleted file (it may not be allocated) * Status: allocation status of the Recycle Bin file. * User: user the recycle bin belongs to. If not found a SID is shown """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.disk = getSourceImage(self.myconfig) self.image = os.path.join(self.myconfig('imagedir'), self.disk.disknumber) self.vss = self.myflag('vss') # Associate a partition name with a partition object or a loop device self.partitions = { ''.join(['p', p.partition]): p for p in self.disk.partitions if p.isMountable } if not self.partitions: self.logger().error('No partitions found in image {}'.format( self.disk.imagefile)) exit(1) self.vss_partitions = { v: dev for p in self.partitions.values() for v, dev in p.vss.items() if dev } self.logger().debug('Partitions: {}'.format(self.partitions)) self.logger().debug('Vss Partitions: {}'.format(self.vss_partitions)) self.mountdir = self.myconfig('mountdir') if not os.path.isdir(self.mountdir): self.logger().error("Mount directory {} does not exist".format( self.mountdir)) exit(1) self.timeline_file = os.path.join( self.myconfig('timelinesdir'), '{}_BODY.csv'.format(self.disk.disknumber)) def run(self, path=""): """ Main function to extract $Recycle.bin files. """ if self.vss: output_path = self.myconfig('voutdir') else: output_path = self.myconfig('outdir') try: check_file(self.timeline_file, error_missing=True) except base.job.RVTError: return [] check_directory(output_path, create=True) self.filesystem = FileSystem(self.config) # Get the users associated with each SID for every partition self.sid_user = {} if self.vss: for p in self.vss_partitions: self.sid_user[p] = self.generate_SID_user(p) else: for p in self.partitions: self.sid_user[p] = self.generate_SID_user(p) self.logger().info('Starting to parse RecycleBin') # RB_codes relates a a six digit recyclebin code with a path for a file. Are updated for each partition or vss? self.RB_codes = {} if self.vss: for partition in self.vss_partitions: self.logger().info( 'Processing Recycle Bin in partition {}'.format(partition)) try: self.parse_RecycleBin(partition) except Exception as exc: if self.myflag('stop_on_error'): raise exc continue output_file = os.path.join( output_path, "{}_recycle_bin.csv".format(partition)) self.save_recycle_files(output_file, partition, sorting=True) else: try: self.parse_RecycleBin() except Exception as exc: if self.myflag('stop_on_error'): raise exc return [] output_file = os.path.join(output_path, "recycle_bin.csv") self.save_recycle_files(output_file, sorting=True) self.logger().info("Done parsing Recycle Bin!") return [] def parse_RecycleBin(self, partition=None): """ Search all Recycle.Bin files found on the timeline. Both allocated and deleted. """ # Find the $I files first so a list of codes associated to RecycleBin files can be created # Then uses that list to assign names and data to $R files found later. self.i_files = {} self.r_files = [] if self.vss: self.timeline_file = os.path.join(self.myconfig('vtimelinesdir'), '{}_BODY.csv'.format(partition)) try: check_file(self.timeline_file, error_missing=True) except base.job.RVTError as e: self.logger().warning('{}. Skipping vss {}'.format( e, partition)) return self.logger().debug('Timeline file: {}'.format(self.timeline_file)) search_command = 'grep -P "{regex}" "{path}"' # Parse $I files in RecycleBin: self.logger().info('Searching RecycleBin $I files') # Realloc files have metadata pointing to new allocated data that does not match the filename. # They cannot be recovered, but the reference to an older name can give some usefull information, so they are included regex = [r'\$Recycle\.Bin.*\$I', r'\$RECYCLE\.BIN.*\$I'] module = base.job.load_module(self.config, 'base.commands.RegexFilter', extra_config=dict(cmd=search_command, keyword_list=regex)) if not os.path.exists(self.timeline_file) or os.path.getsize( self.timeline_file) == 0: self.logger().error( 'Timeline BODY file not found or empty for partition {}. Run fs_timeline job before executing winRecycle' .format(partition)) raise base.job.RVTError( 'Timeline BODY file not found or empty for partition {}. Run fs_timeline job before executing winRecycle' .format(partition)) for line in module.run(self.timeline_file): self._process_I_file(line['match'], partition) # Parse $R files in RecycleBin: self.logger().info('Searching RecycleBin $R files') regex = [r'\$Recycle\.Bin.*\$R', r'\$RECYCLE\.BIN.*\$R'] module = base.job.load_module(self.config, 'base.commands.RegexFilter', extra_config=dict(cmd=search_command, keyword_list=regex)) for line in module.run(self.timeline_file): self._process_R_file(line['match'], partition) def _process_timeline_record(self, body_record): """ Extract and modify relevant information of each timeline_BODY record supplied. """ # Timeline BODY fields: "file_md5|path|file_inode|file_mode|file_uid|file_gid|file_size|file_access|file_modified|file_changerecord|file_birth" _, filename, inode, _, _, _, size, _, _, change_time, _ = body_record.split( '|') # filename format for vss: 'vYpXX/path' or 'vYYpXX/path' if more than 9 vss in a partition # filename format for regular timeline: 'source/mnt/pXX/path' or 'source/mnt/p0/path' if single partition in image if filename.find('$FILE_NAME') > 0: # Skip $FILE_NAME files return fn_splitted = filename.split('/') # Mark status of the file [allocated, deleted, realloc]. In realloc entries extraction makes no sense file_status = 'realloc' if filename[-9:] == '-realloc)' else ( 'deleted' if filename[-9:] == '(deleted)' else 'allocated') if self.vss: partition, SID = fn_splitted[2], fn_splitted[4] if partition not in self.partitions: self.logger().warning( 'Partition number {} obtained from timeline does not match any partition' .format(partition)) return # Clean filename stripping the '(deleted)' ending filename = filter_deleted_ending(filename) user = self.get_user_from_SID(SID, partition) else: part, SID = fn_splitted[2], fn_splitted[4] try: # Find partition object associated to selected partition number partition = self.partitions[part] except KeyError: self.logger().warning( 'Partition number {} obtained from timeline does not match any partition' .format(part)) return # Clean filename stripping the '(deleted)' ending filename = filter_deleted_ending(filename) user = self.get_user_from_SID(SID, part) size = int(size) inode = int(inode.split('-')[0]) return filename, size, inode, partition, user, file_status def _process_I_file(self, line, p_name): """ Extract metadata from every $I files and store it. """ try: filename, size, inode, partition, user, file_status = self._process_timeline_record( line) except TypeError: return if size == 0 or size > 4096: # Standard size of $I file is 544 bytes. Avoid empty or corrupted files. self.logger().debug( 'Wrong $I file size ({}). Not parsing {}'.format( size, filename)) return # For allocated files, search the file in mounted disk. In case of deleted recover from inode if file_status == 'allocated': if self.vss: record = os.path.join( self.myconfig('casedir'), filename.replace(p_name[p_name.find('p'):], p_name, 1)) else: record = os.path.join(self.myconfig('casedir'), filename) elif file_status == 'deleted': if self.vss: record = self.filesystem.icat(inode, p_name, vss=True) else: record = self.filesystem.icat(inode, p_name) # subprocess.run('icat -o {} {}.dd {} > {}'.format(offset, self.image, inode, tempfile), shell=True) else: # realloc. Not even try to parse return try: i_data = self.get_data(record, filename, status=file_status, user=user) except Exception as e: self.logger().error(e) return if i_data: rb_code = self.get_bin_name(filename, I_file=True) if rb_code not in self.RB_codes: # It should not be except for vss self.RB_codes[rb_code] = i_data['OriginalName'] self.i_files[rb_code] = i_data def _process_R_file(self, line, p_name): """ List $R files not parsed as $I. Updates inode in $I files""" try: filename, size, inode, partition, user, file_status = self._process_timeline_record( line) except TypeError: return bin_code = self.get_bin_name(filename, I_file=False) char_pos = filename.find( '$R{}'.format(bin_code) ) # First match of '#R' will be with '#Recycle', that's why '$Rcode' is looked for. # When a directory and its contents are sent to the Recycle Bin, only the dir has an associated $Icode file. Subfiles inside are stored as $Rcode{ending}/somesubfolder/somefile # Detect if $R file belongs to a directory sent to Bin try: sep_char = filename[char_pos + 8:].find('/') subfile = True if sep_char != -1 else False # subfile = True if filename[char_pos + 8] == '/' else False except IndexError: subfile = False if file_status == 'realloc': inode = 0 # Makes no sense to recover from inode, since it has been reallocated if bin_code in self.RB_codes: if not subfile: # Already parsed as $I, only lacks inode self.update_inode(inode, bin_code, file_status) return else: # Subfiles in the directory # Take the first part of the path from the corresponding $I file, append the rest original_name = os.path.join( self.i_files[bin_code]['OriginalName'], filename[char_pos + 9 + sep_char:]) # Containing folder and all subfiles were deleted at the same time, otherwise another recycle code would have been generated del_time = self.i_files[bin_code]['Date'] else: # TODO: search inode in vss_fls and get name original_name = '' # Can't determine original name del_time = datetime.datetime(1970, 1, 1).strftime("%Y-%m-%d %H:%M:%S") r_data = OrderedDict([('Date', del_time), ('Size', size), ('File', filename), ('OriginalName', original_name), ('Inode', inode), ('Status', file_status), ('User', user)]) if r_data: self.r_files.append(r_data) @staticmethod def get_bin_name(fname, I_file=True): """ Extract the 6 characters name assigned by the Recycle Bin """ if I_file: pos = fname.find("$I") return fname[pos + 2:pos + 8] else: start = fname.find("$R") pos = fname[start + 2:].find("$R") return fname[start + pos + 4:start + pos + 10] def update_inode(self, inode, bin_code, file_status): ino = self.i_files[bin_code].get('Inode', 0) if not ino and inode: # Upsate only when new inode is different than 0 and Inode key was 0 self.i_files[bin_code]['Inode'] = inode def get_data(self, file, filepath, status='allocated', inode=0, user=''): """ Return a new record parsing file's metadata. Args: file (str or bytes): $I url or byte-string containing the data filepath (str): name of the mount path to $I file status (str): allocated, deleted, realloc inode (int): inode of the $R file Returns: dict: keys = [Date, Size, File, OriginalName, Inode, Status, User] """ try: with BytesIO(file) as f: # file is a byte-string data = self.get_metadata(f, filepath) except TypeError: with open(file, 'rb') as f: # file is an url str of a path location data = self.get_metadata(f, filepath) if data: data.update([('Inode', inode), ('Status', status), ('User', user)]) return data def get_metadata(self, f, filepath): """ Parse $I file and obtain metadata Args: f (str): $I file_object filepath (str): name of the mount path to $I file Returns: dict: keys = [Date, Size, File, OriginalName] """ # For information about $I files structure: # https://df-stream.com/2016/04/fun-with-recycle-bin-i-files-windows-10/ try: data = f.read() header = struct.unpack_from('B', data)[0] except Exception: self.logger().warning( 'Unrecognized $I header for file: {}'.format(filepath)) return {} try: if header == 2: # windows 10 name_length = struct.unpack_from('<i', data, 24)[0] file_name = data[28:28 + name_length * 2].decode('utf-16').rstrip('\x00').replace( '\\', '/') elif header == 1: file_name = data[24:24 + 520].decode('utf-16').rstrip('\x00').replace( '\\', '/') else: self.logger().warning( 'Unrecognized $I header for file: {}'.format(filepath)) return {} except Exception: self.logger().warning( 'Problems getting filename for file: {}'.format(filepath)) file_name = '' try: size = struct.unpack_from('<q', data, 8)[0] except Exception: self.logger().warning( 'Problems getting file size for file: {}'.format(filepath)) size = 0 try: deleted_time = ms_time_to_unix( struct.unpack_from('<q', data, 16)[0]) except Exception as exc: self.logger().warning( 'Problems getting deleted timestamp for file: {}. Err: {}'. format(filepath, exc)) deleted_time = datetime.datetime(1970, 1, 1).strftime("%Y-%m-%d %H:%M:%S") try: return OrderedDict([('Date', deleted_time), ('Size', size), ('File', filepath), ('OriginalName', file_name)]) except Exception: self.logger().info( 'Wrong $I format or missing field: {}'.format(filepath)) return {} def save_recycle_files(self, output_file, partition=None, sorting=True): """ Sort recycle bin files by date and save to 'output_file' csv. """ if not (len(self.i_files) or len(self.r_files)): self.logger().info('No RecycleBin files found{}.'.format( ' in partition {}'.format(partition if partition else ''))) return if sorting: self.RB_files = list(self.i_files.values()) + self.r_files self.RB_files = sorted(self.RB_files, key=lambda it: it['Date']) else: self.RB_files = chain(self.i_files.values(), self.r_files) check_file(output_file, delete_exists=True) save_csv(self.RB_files, outfile=output_file, quoting=0, file_exists='OVERWRITE') def generate_SID_user(self, partition): rip = self.config.get('plugins.common', 'rip', '/opt/regripper/rip.pl') try: software = self.locate_hives(partition)['software'] # software = GetFiles(self.config, vss=self.myflag("vss")).search('{}/windows/system32/config/SOFTWARE$'.format(partition))[0] # software = os.path.join(self.myconfig('casedir'), software) except (KeyError, TypeError): self.logger().warning( 'No Software registry file found for partition {}'.format( partition)) return {} output_profilelist = subprocess.check_output( [rip, "-r", software, "-p", 'profilelist']).decode() # output_samparse = subprocess.check_output([rip, "-r", sam, "-p", 'samparse']).decode() us = {} is_path = False for i in output_profilelist.split('\n'): if i.startswith("Path"): mo = re.search("Users.(.*)", i) if mo is not None: user = mo.group(1) is_path = True else: mo = re.search("Documents and Settings.([^\n]*)", i) if mo is not None: user = mo.group(1) is_path = True else: if i.startswith("SID") and is_path: sid = i.split(':')[1][1:] is_path = False us[sid] = user return us def get_user_from_SID(self, SID, partition): """ Return the user associated with a SID. Search in other partitions and vss for a user with same SID if not found in current partition. """ try: return self.sid_user[partition][SID] except (TypeError, KeyError): self.logger().debug( 'SID {} does not have an associated user in partition {}'. format(SID, partition)) for p in {**self.partitions, **self.vss_partitions}: if p != partition: try: return self.sid_user[p][SID] except (TypeError, KeyError): continue return SID def locate_hives(self, partition): """ Return the path to the main hives, as a dictionary. """ # it can also be done with GetFiles part_dir = os.path.join(self.mountdir, partition) folder_combinations = product( *((c.capitalize(), c.upper(), c) for c in ['windows', 'system32', 'config'])) for dir in (os.path.join(*i) for i in folder_combinations): config_dir = os.path.join(part_dir, dir) if os.path.exists(config_dir): break else: # Config folder not found self.logger().info( 'No config directory found for partition {}'.format(partition)) return hives = {} for j in os.listdir(config_dir): if j.lower() in ["software", "sam", "system", "security"]: hives[j.lower()] = os.path.join(config_dir, j) continue return hives
class LnkExtract(base.job.BaseModule): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.dicID = load_appID(myconfig=self.myconfig) self.vss = self.myflag('vss') self.encoding = self.myconfig('encoding', 'cp1252') def read_config(self): super().read_config() # appid is a file relating applications id with names. https://github.com/EricZimmerman/JumpList/blob/master/JumpList/Resources/AppIDs.txt self.set_default_config( 'appid', os.path.join(self.config.config['windows']['plugindir'], 'appID.txt')) def run(self, path=""): """ Parses lnk files, jumlists and customdestinations """ self.logger().info("Extraction of lnk files") self.Files = GetFiles(self.config, vss=self.myflag("vss")) self.filesystem = FileSystem(self.config) self.mountdir = self.myconfig('mountdir') lnk_path = self.myconfig('{}outdir'.format('v' if self.vss else '')) check_folder(lnk_path) users = get_user_list(self.mountdir, self.vss) artifacts = { 'lnk': { 'filename': "{}_lnk.csv", 'regex': r"{}/.*\.lnk$", 'function': self.lnk_parser }, 'autodest': { 'filename': "{}_jl.csv", 'regex': r"{}/.*\.automaticDestinations-ms$", 'function': self.automaticDest_parser }, 'customdest': { 'filename': "{}_jlcustom.csv", 'regex': r"{}/.*\.customDestinations-ms$", 'function': self.customDest_parser } } for user in users: usr = "******".format(user.split("/")[0], user.split("/")[2]) for a_name, artifact in artifacts.items(): out_file = os.path.join(lnk_path, artifact['filename'].format(usr)) files_list = list( self.Files.search(artifact['regex'].format(user))) self.logger().info( "Founded {} {} files for user {} at {}".format( len(files_list), a_name, user.split("/")[-1], user.split("/")[0])) if len(files_list) > 0: save_csv(artifact['function'](files_list), config=self.config, outfile=out_file, quoting=0, file_exists='OVERWRITE') self.logger().info( "{} extraction done for user {} at {}".format( a_name, user.split("/")[-1], user.split("/")[0])) self.logger().info("RecentFiles extraction done") return [] def lnk_parser(self, files_list): """ Parses all '.lnk' files found for a user. Parameters: files_list (list): list of automaticDestinations-ms files to parse (relative to casedir) """ headers = [ "mtime", "atime", "ctime", "btime", "drive_type", "drive_sn", "machine_id", "path", "network_path", "size", "atributes", "description", "command line arguments", "file_id", "volume_id", "birth_file_id", "birth_volume_id", "f_mtime", "f_atime", "f_ctime", "file" ] data = self.filesystem.get_macb(files_list, vss=self.vss) for file in files_list: lnk = Lnk(os.path.join(self.myconfig('casedir'), file), self.encoding, logger=self.logger()) lnk = lnk.get_lnk_info() if lnk == -1: self.logger().warning("Problems with file {}".format(file)) yield OrderedDict( zip( headers, data[file] + [ "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", file ])) else: yield OrderedDict(zip(headers, data[file] + lnk + [file])) def automaticDest_parser(self, files_list): """ Parses automaticDest files Parameters: files_list (list): list of automaticDestinations-ms files to parse """ # TODO: Get the default Windows encoding and avoid trying many # TODO: Parse the files without DestList # Differences in DestList between versions at: # https://cyberforensicator.com/wp-content/uploads/2017/01/1-s2.0-S1742287616300202-main.2-14.pdf # Obtain the JumpList version from the header of DestList entry for jl in files_list: try: ole = olefile.OleFileIO( os.path.join(self.myconfig('casedir'), jl)) except Exception as exc: self.logger().warning( "Problems creating OleFileIO with file {}\n{}".format( jl, exc)) continue try: data = ole.openstream('DestList').read() header_version, = struct.unpack('<L', data[0:4]) version = 'w10' if header_version >= 3 else 'w7' self.logger().info( "Windows version of Jumplists: {}".format(version)) break except Exception: continue finally: ole.close() if 'version' not in locals(): self.logger().warning( "Can't determine windows version. Assuming w10") version = 'w10' # default # Offsets for diferent versions entry_ofs = {'w10': 130, 'w7': 114} id_entry_ofs = {'w10': ['<L', 88, 92], 'w7': ['<Q', 88, 96]} sz_ofs = {'w10': [128, 130], 'w7': [112, 114]} final_ofs = {'w10': 4, 'w7': 0} headers = [ "Open date", "Application", "drive_type", "drive_sn", "machine_id", "path", "network_path", "size", "atributes", "description", "command line arguments", "file_id", "volume_id", "birth_file_id", "birth_volume_id", "f_mtime", "f_atime", "f_ctime", "file" ] # Main loop for jl in files_list: self.logger().info("Processing Jump list : {}".format( jl.split('/')[-1])) try: ole = olefile.OleFileIO( os.path.join(self.myconfig('casedir'), jl)) except Exception as exc: self.logger().warning( "Problems creating OleFileIO with file {}\n{}".format( jl, exc)) continue if not ole.exists('DestList'): self.logger().warning( "File {} does not have a DestList entry and can't be parsed" .format(jl)) ole.close() continue else: if not (len(ole.listdir()) - 1): self.logger().warning( "Olefile has detected 0 entries in file {}\nFile will be skipped" .format(jl)) ole.close() continue dest = ole.openstream('DestList') data = dest.read() if len(data) == 0: self.logger().warning( "No DestList data in file {}\nFile will be skipped". format(jl)) ole.close() continue self.logger().debug("DestList lenght: {}".format( ole.get_size("DestList"))) try: # Double check number of entries current_entries, pinned_entries = struct.unpack( "<LL", data[4:12]) self.logger().debug( "Current entries: {}".format(current_entries)) except Exception as exc: self.logger().warning( "Problems unpacking header Destlist with file {}\n{}". format(jl, exc)) # continue ofs = 32 # Header offset while ofs < len(data): stream = data[ofs:ofs + entry_ofs[version]] name = "" try: name = stream[72:88].decode() except Exception: self.logger().info("utf-8 decoding failed") try: name = stream[72:88].decode("cp1252") except Exception as exc: self.logger().info("cp1252 decoding failed") self.logger().warning( "Problems decoding name with file {}\n{}". format(jl, exc)) name = name.replace("\00", "") # Get id_entry of next entry try: id_entry, = struct.unpack( id_entry_ofs[version][0], stream[id_entry_ofs[version][1]: id_entry_ofs[version][2]]) except Exception as exc: self.logger().warning( "Problems unpacking id_entry with file {}\n{}". format(jl, exc)) # self.logger().debug(stream[id_entry_ofs[version][1]:id_entry_ofs[version][2]]) break id_entry = format(id_entry, '0x') # Get MSFILETIME try: time0, time1 = struct.unpack("II", stream[100:108]) except Exception as exc: self.logger().warning( "Problems unpacking MSFILETIME with file {}\n{}". format(jl, exc)) break timestamp = getFileTime(time0, time1) # sz: Length of Unicodestring data try: sz, = struct.unpack( "h", stream[sz_ofs[version][0]:sz_ofs[version][1]]) # self.logger().debug("sz: {}".format(sz)) except Exception as exc: self.logger().warning( "Problems unpaking unicode string size with file {}\n{}" .format(jl, exc)) # self.logger().debug(stream[sz_ofs[version][0]:sz_ofs[version][1]]) break ofs += entry_ofs[version] sz2 = sz * 2 # Unicode 2 bytes # Get unicode path path = "" try: path = data[ofs:ofs + sz2].decode() except UnicodeDecodeError: try: path = data[ofs:ofs + sz2].decode("iso8859-15") except Exception as exc: self.logger().warning( "Problems decoding path with file {}\n{}". format(jl, exc)) path = path.replace("\00", "") temp = tempfile.NamedTemporaryFile() # Move to the next entry ofs += sz2 + final_ofs[version] try: aux = ole.openstream(id_entry) except Exception as exc: self.logger().warning( "Problems with file {}\n{}".format(jl, exc)) self.logger().warning("ole.openstream failed") temp.close() break datos = aux.read() temp.write(datos) temp.flush() # Extract lnk data lnk = Lnk(temp.name, self.encoding, logger=self.logger()) lnk = lnk.get_lnk_info() temp.close() n_hash = os.path.basename(jl).split(".")[0] if lnk == -1: yield OrderedDict( zip(headers, [ time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(timestamp)), self.dicID.get(n_hash, n_hash), "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", jl ])) else: yield OrderedDict( zip(headers, [ time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(timestamp)), self.dicID.get(n_hash, n_hash) ] + lnk + [jl])) ole.close() self.logger().info("Jumlists parsed") def customDest_parser(self, files_list): """ Parses customDest files Parameters: files_list (list): list of customDestinations-ms files to parse """ # regex = re.compile("\x4C\x00\x00\x00\x01\x14\x02\x00") split_str = b"\x4C\x00\x00\x00\x01\x14\x02\x00" headers = [ "Application", "drive_type", "drive_sn", "machine_id", "path", "network_path", "size", "atributes", "description", "command line arguments", "file_id", "volume_id", "birth_file_id", "birth_volume_id", "f_mtime", "f_atime", "f_ctime", "file" ] for jl in files_list: with open(os.path.join(self.myconfig('casedir'), jl), "rb") as f: data = f.read() lnks = data.split(split_str) for lnk_b in lnks[1:]: f_temp = tempfile.NamedTemporaryFile() f_temp.write(b"\x4C\x00\x00\x00\x01\x14\x02\x00" + lnk_b) f_temp.flush() lnk = Lnk(f_temp.name, self.encoding, logger=self.logger()) lnk = lnk.get_lnk_info() f_temp.close() n_hash = os.path.basename(jl).split(".")[0] if lnk == -1: yield OrderedDict( zip(headers, [ self.dicID.get(n_hash, n_hash), "", "", "", "", "", "", "", "", "", "", "", "", jl ])) else: yield OrderedDict( zip(headers, [self.dicID.get(n_hash, n_hash)] + lnk + [jl])) self.logger().info("customDestinations parsed")
class UsnJrnl(base.job.BaseModule): def run(self, path=""): """ Parse UsnJrnl files of a disk """ self.vss = self.myflag('vss') disk = getSourceImage(self.myconfig) self.usn_path = self.myconfig( 'voutdir') if self.vss else self.myconfig('outdir') check_folder(self.usn_path) self.usn_jrnl_file = os.path.join(self.usn_path, "UsnJrnl") self.filesystem = FileSystem(self.config, disk=disk) for p in disk.partitions: if not p.isMountable: continue if not self.vss: pname = ''.join(['p', p.partition]) self._parse_usnjrnl(pname) else: for v, dev in p.vss.items(): if dev == "": continue self._parse_usnjrnl(v) # Delete the temporal UsnJrnl dumped file if os.path.exists(self.usn_jrnl_file): os.remove(self.usn_jrnl_file) return [] def _parse_usnjrnl(self, pname): """ Get and parses UsnJrnl file for a partition """ inode = self.filesystem.get_inode_from_path('/$Extend/$UsnJrnl:$J', pname) if inode == -1: self.logger().warning( "Problem getting UsnJrnl from partition {}. File may not exist" .format(pname)) return # Dumps UsnJrnl file from the data stream $J self.logger().info( "Dumping journal file of partition {}".format(pname)) if self.vss: self.filesystem.icat(inode, pname, output_filename=self.usn_jrnl_file, attribute="$J", vss=True) else: self.filesystem.icat(inode, pname, output_filename=self.usn_jrnl_file, attribute="$J") self.logger().info( "Extraction of journal file completed for partition {}".format( pname)) self.logger().info("Creating file {}".format( os.path.join(self.usn_path, "UsnJrnl_{}.csv".format(pname)))) if os.stat(self.usn_jrnl_file).st_size > 0: # Create dump file records = self.parseUsn(infile=self.usn_jrnl_file, partition=pname) outfile = os.path.join(self.usn_path, "UsnJrnl_dump_{}.csv".format(pname)) save_csv(records, outfile=outfile, file_exists='OVERWRITE', quoting=0) # Create summary file from dump file filtered_records = self.summaryUsn(infile=outfile, partition=pname) out_summary = os.path.join(self.usn_path, "UsnJrnl_{}.csv".format(pname)) save_csv(filtered_records, outfile=out_summary, file_exists='OVERWRITE', quoting=0) def parseUsn(self, infile, partition): """ Generator that returns a dictionary for every parsed record in UsnJrnl file. Args: input_file (str): path to UsnJrnl file partition (str): partition name """ journalSize = os.path.getsize(infile) self.folders = dict() # Stores filenames associated to directories with open(infile, "rb") as f: dataPointer = self.findFirstRecord(f) f.seek(dataPointer) # Estimate number of entries in UsnJrnl for progressBar. # Since 96 is a pessimistic average, process should terminate before progressBar reaches 100%. estimated_entries = int((journalSize - dataPointer) / 96) with tqdm(total=estimated_entries, desc='Parse_UsnJrnl dump_{}'.format(partition)) as pbar: total_entries_found = 0 while True: nextRecord = self.findNextRecord(f, journalSize) total_entries_found += 1 if not nextRecord: pbar.update(estimated_entries - total_entries_found) break u = Usn(f) f.seek(nextRecord) try: parent_mft = str(u.parentMftEntryNumber) except Exception: parent_mft = -1 if str(u.fileAttributes).find( "DIRECTORY") > -1 and u.mftEntryNumber != -1: self.folders[u.mftEntryNumber] = [ u.filename, u.parentMftEntryNumber ] if u.mftEntryNumber != -1: yield OrderedDict([('Date', u.timestamp), ('MFT Entry', u.mftEntryNumber), ('Parent MFT Entry', parent_mft), ('Filename', u.filename), ('File Attributes', u.fileAttributes), ('Reason', u.reason)]) pbar.update() self.logger().info( '{} journal entries found in partition {}'.format( total_entries_found, partition)) def summaryUsn(self, infile, partition): """ Return the relevant records from the UsnJrnl, adding full_path to filename """ partition = infile.split( '_')[-1][:-4] # infile in format 'UsnJrnl_dump_p06.csv' self.inode_fls = self.filesystem.load_path_from_inode( partition=partition, vss=self.vss) self.logger().debug( 'Correctly loaded inode-name relation file for partiton {}'.format( partition)) folders = self.complete_dir(self.folders, partition) # Fields to filter fields = "(RENAME_OLD_NAME|RENAME_NEW_NAME|FILE_DELETE CLOSE|FILE_CREATE CLOSE)" out_fields = [ 'Date', 'Filename', 'Full Path', 'File Attributes', 'Reason', 'MFT Entry', 'Parent MFT Entry', 'Reliable Path' ] base_dir = os.path.join(self.myconfig('source'), 'mnt', partition) for record in base.job.run_job(self.config, 'base.input.CSVReader', path=[infile]): if re.search(fields, record['Reason']): try: # Give priority to folders already found in journal record['Full Path'] = os.path.join( base_dir, folders[int(record['Parent MFT Entry'])][0], record['Filename']) record['Reliable Path'] = folders[int( record['Parent MFT Entry'])][1] except Exception: # parent inode not found in journal, inode info is used to complete path record['Full Path'] = os.path.join( self.inode_fls[record['Parent MFT Entry']][0], record['Filename']) record['Reliable Path'] = False yield OrderedDict([(i, record[i]) for i in out_fields]) @staticmethod def findFirstRecord(infile): """ Returns a pointer to the first USN record found Modified version of Dave Lassalle's "parseusn.py" https://github.com/sans-dfir/sift-files/blob/master/scripts/parseusn.py Args: infile (str): filename """ while True: data = infile.read(6553600).lstrip(b'\x00') if data: return infile.tell() - len(data) @staticmethod def findNextRecord(infile, journalSize): """Often there are runs of null bytes between USN records This function reads through them and returns a pointer to the start of the next USN record Args: infile (str): filename journalSize (int): size of journal file """ while True: try: recordLength = struct.unpack_from("I", infile.read(4))[0] if recordLength: infile.seek(-4, 1) return (infile.tell() + recordLength) except struct.error: if infile.tell() >= journalSize: return False def complete_dir(self, folders, partition): """ Reconstructs absolutepaths of inodes from information of UsnJrnl. If it's not possible to reach root folder (inode 5), it uses $MFT entry. Such files are marked as unreliable Args: folders (list): folders partition (str): partiton name """ final_folders = {} # keys:inode; values:(filename, reliable) final_folders[5] = "" # Root directory for entr in folders.keys(): name = "" parent = folders[entr][1] actual = entr while True: if parent == 5: final_folders[entr] = (name, True) break if parent in final_folders.keys(): final_folders[entr] = (os.path.join( final_folders[parent][0], folders[actual][0], name), final_folders[parent][1]) break name = os.path.join(folders[actual][0], name) actual = parent try: parent = folders[parent][1] continue except Exception: # Use MFT to complete the path try: final_folders[entr] = (os.path.join( self.inode_fls[str(parent)][0], name), False) break except Exception: final_folders[entr] = (os.path.join("*", name), False) break return final_folders