def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.disk = getSourceImage(self.myconfig) self.image = os.path.join(self.myconfig('imagedir'), self.disk.disknumber) self.vss = self.myflag('vss') # Associate a partition name with a partition object or a loop device self.partitions = { ''.join(['p', p.partition]): p for p in self.disk.partitions if p.isMountable } if not self.partitions: self.logger().error('No partitions found in image {}'.format( self.disk.imagefile)) exit(1) self.vss_partitions = { v: dev for p in self.partitions.values() for v, dev in p.vss.items() if dev } self.logger().debug('Partitions: {}'.format(self.partitions)) self.logger().debug('Vss Partitions: {}'.format(self.vss_partitions)) self.mountdir = self.myconfig('mountdir') if not os.path.isdir(self.mountdir): self.logger().error("Mount directory {} does not exist".format( self.mountdir)) exit(1) self.timeline_file = os.path.join( self.myconfig('timelinesdir'), '{}_BODY.csv'.format(self.disk.disknumber))
def __init__(self, *args, disk=None, **kwargs): super().__init__(*args, **kwargs) self.disk = disk if disk is None: self.disk = getSourceImage(self.myconfig) self.string_path = self.myconfig('outdir') check_directory(self.string_path, create=True)
def run(self, path=None): if self.from_module: for data in self.from_module.run(path): yield data disk = getSourceImage(self.myconfig) disk.umount() return []
def run(self, path=""): """ Parse UsnJrnl files of a disk """ self.vss = self.myflag('vss') disk = getSourceImage(self.myconfig) self.usn_path = self.myconfig( 'voutdir') if self.vss else self.myconfig('outdir') check_folder(self.usn_path) self.usn_jrnl_file = os.path.join(self.usn_path, "UsnJrnl") self.filesystem = FileSystem(self.config, disk=disk) for p in disk.partitions: if not p.isMountable: continue if not self.vss: pname = ''.join(['p', p.partition]) self._parse_usnjrnl(pname) else: for v, dev in p.vss.items(): if dev == "": continue self._parse_usnjrnl(v) # Delete the temporal UsnJrnl dumped file if os.path.exists(self.usn_jrnl_file): os.remove(self.usn_jrnl_file) return []
def run(self, path=""): """ Characterizes a disk image """ self.disk = getSourceImage(self.myconfig) self.filesystem = FileSystem(self.config, disk=self.disk) self.characterize_Linux() return []
def run(self, path=None): """ The output dictionaries with disk information are expected to be sent to a mako template """ disk = getSourceImage(self.myconfig) disk_info = self.get_image_information(disk) os_info = self.characterize_Windows() self.logger().info('Disk characterization finished') return [ dict(disk_info=disk_info, os_info=os_info, source=self.myconfig('source')) ]
def run(self, path=""): self.vss = self.myflag('vss') if not self.vss: evtx_path = self.myconfig('outdir') self.generate(evtx_path) else: disk = getSourceImage(self.myconfig) evtx_path = self.myconfig('voutdir') for p in disk.partitions: for v, mp in p.vss.items(): if mp != "": self.generate(os.path.join(evtx_path, v)) self.logger().info("Evtx Done") return []
def run(self, path=""): self.disk = getSourceImage(self.myconfig) keyfile = path self.logger().debug('Testing existance of {}'.format(keyfile)) if not keyfile: keyfile = self.myconfig('keyfile') check_file(keyfile, error_missing=True) # Get string files or generate them if not found self.string_path = self.myconfig('strings_dir') if not (check_directory(self.string_path) and os.listdir(self.string_path)): self.logger().debug("No string files found. Generating them") StringGenerate(config=self.config, disk=self.disk).generate_strings() self.search_path = self.myconfig('outdir') check_directory(self.search_path, create=True) self.keywords = getSearchItems( keyfile) # Get kw:regex dictionary reading keyfile self.blocks = { } # Store set of blocks for kw and partition. Ex: {'my_kw': {'p02': set(1234, 1235, ...)}} self.block_status = defaultdict( dict ) # Store status for blocks with search hits in a partition. Ex:{'03':{4547:'Allocated', 1354536:'Not Allocated'}} self.fs_object = FileSystem(self.config, disk=self.disk) # Generate or load 'hits_' and 'blocks_' files for kname in tqdm(self.keywords, total=len(self.keywords), desc='Searching keywords in strings'): kw = kname.strip() self.get_blocks(kw, self.keywords[kname]) # Generate 'all_' files self.get_cluster() self.logger().info("StringSearch done") return []
def _generate_allocfiles_vss(self): """ Generates allocfiles from mounted vshadows snapshots """ disk = getSourceImage(self.myconfig) mountdir = self.myconfig('mountdir') if not base.utils.check_directory(mountdir): self.logger().warning('Disk not mounted') return outdir = self.config.get(self.section, 'voutdir') base.utils.check_directory(outdir, create=True) find = self.myconfig('find', 'find') for p in disk.partitions: for v, dev in p.vss.items(): if dev != "": with open(os.path.join(outdir, "alloc_{}.txt").format(v), 'wb') as f: relative_v_mountdir = base.utils.relative_path(os.path.join(mountdir, v), self.myconfig('casedir')) base.commands.run_command([find, '-P', relative_v_mountdir + '/'], stdout=f, logger=self.logger(), from_dir=self.myconfig('casedir'))
def run(self, path=""): """ Generator of INDX entries as dictionaries. Also writes to csv files""" self.disk = getSourceImage(self.myconfig) self.sector_size = self.disk.sectorsize self.parseINDX_ROOTFiles = self.myflag( 'root', False) # Parse also INDX_ROOT records if set self.skip_short_filenames = self.myflag('skip_short', False) self.only_slack = self.myflag('only_slack', False) outdir = self.myconfig('outdir') check_directory(outdir, create=True) for p in self.disk.partitions: if not p.isMountable: continue # Get a dictionary {inode: list of names} from 'fls' to later relate inodes to a path. 'inode' keys are strings, not int. part_name = ''.join(['p', p.partition]) try: self.inode_fls = FileSystem( self.config).load_path_from_inode(partition=part_name) self.logger().debug( 'Correctly loaded inode-name relation file for partiton {}' .format(part_name)) except Exception as e: self.logger().error(e) continue # Start the carving at next to last execution block parsed outfile = os.path.join( outdir, '{}{}_INDX_timeline.csv'.format( part_name, '_slack' if self.only_slack else '')) self.lastParsedBlk = 0 if self.myflag('use_localstore'): self.lastParsedBlk = int( self.config.store_get( 'last_{}_block_parsed'.format(part_name), 0)) self.logger().debug('lastParsedBlk: {}'.format(self.lastParsedBlk)) csv_args = {'file_exists': 'APPEND', 'write_header': True} if self.lastParsedBlk: if not os.path.exists(outfile): self.logger().warning( 'Starting new file {0} at an advanced offset. Set "last_{0}_block_parsed" at 0 in "store.ini" if a fresh start is desired' .format(outfile)) else: csv_args['write_header'] = False else: if os.path.exists(outfile): self.logger().warning( 'Overwriting file {}'.format(outfile)) csv_args['file_exists'] = 'OVERWRITE' # Write the parsed entries to a csv file for each partition. save_csv(self.parse_INDX(p), config=self.config, outfile=outfile, quoting=0, **csv_args) return []
def run(self, path=""): self.disk = getSourceImage(self.myconfig) if not self.disk.exists(): self.logger().error(self.disk) return self.source = self.myconfig('source') self.outFolder = self.myconfig('deleteddir') check_directory(self.outFolder, create=True) # Set maximal dates for later update self.firstDate = datetime.date.today() + datetime.timedelta(days=365) self.lastDate = datetime.date(1970, 1, 1) # Process Timeline deleted files self.timelineBodyFile = os.path.join(self.myconfig('timelinesdir'), '{}_BODY.csv'.format(self.source)) check_file(self.timelineBodyFile, error_missing=True) # cmd = r"grep '(deleted' {} | grep -v FILE_NAME | cut -d'|' -f2 | sed 's_^[0-9-][0-9-]*/mnt/\(.*\) (deleted.*$_\1_' | sort -u".format(self.timelineBodyFile) cmd = r"grep '(deleted' {} | grep -v '\$FILE_NAME' | cut -d'|' -f2,3,7".format(self.timelineBodyFile) deletedTimelineFiles = shell_command(cmd) df_timeline = self.get_dataframe(deletedTimelineFiles, 'timeline') # Process Recycle self.recycleFile = os.path.join(self.myconfig('recycledir'), 'recycle_bin.csv') check_file(self.recycleFile, error_missing=True) df_recycle = self.get_dataframe(self.recycleFile, 'recycle') # Process UsnJrnl and INDX df_usnjrnl = pd.DataFrame() df_indx = pd.DataFrame() for p in self.disk.partitions: self.partName = ''.join(['p', p.partition]) if p.isMountable: self.usnJrnlFile = os.path.join(self.myconfig('journaldir'), 'UsnJrnl_{}.csv'.format(p.partition)) check_file(self.usnJrnlFile, error_missing=True) df_u = self.get_dataframe(shell_command(r"grep 'DELETE CLOSE' {} | cut -d',' -f 1,2,4".format(self.usnJrnlFile)), 'usnjrnl') self.indxFile = os.path.join(self.myconfig('timelinesdir'), '{}_INDX_timeline.csv'.format(p.partition)) if not check_file(self.indxFile): df_i = pd.DataFrame() # cmd = "grep -v 'SHORT FILENAME FORMAT' {} | grep -v 'NOT OBTAINED' | grep -v 'invalid MFTReference' | cut -d ';' -f 3,4,5,7".format(self.indxFile) # real # cmd = r"tail -n +2 {} | grep -va 'SHORT FILENAME FORMAT' | grep -va 'NOT OBTAINED' | grep -va 'invalid MFTReference' | cut -d ';' -f 2,5,9,14 ".format(self.indxFile) # unsorted # cmd = r"tail -n +2 {} | grep -va 'SHORT FILENAME FORMAT' | grep -va 'NOT OBTAINED' | cut -d ';' -f 2,5,9,14 ".format(self.indxFile) # unsorted cmd = r"tail -n +2 {} | grep -va 'SHORT FILENAME FORMAT' | grep -va 'NOT OBTAINED' | cut -d ';' -f 3,4,6,7,9 ".format(self.indxFile) # real df_i = self.get_dataframe(shell_command(cmd), 'indx') df_usnjrnl = self.join_dataframes(df_usnjrnl, df_u) df_indx = self.join_dataframes(df_indx, df_i) # TODO: timeline_all does not need columns source or reliable # Compare Timeline against INDX to extract unique (assuming deleted) files in INDX cmd = r"cut -d'|' -f2 {} | grep -v '\$FILE_NAME'".format(self.timelineBodyFile) df_all_timeline = self.get_dataframe(shell_command(cmd), 'timeline_all') self.logger().debug('Obtaining unique files in INDX') df_indx = self.get_deleted_in_INDX(df_all_timeline, df_indx) # Create a global dataframe with all artifacts self.logger().info('Combining artifacts to create a full list of deleted files') df_global = self.combine_artifacts([df_usnjrnl, df_recycle, df_timeline, df_indx]) print(df_global.shape, df_global.columns) duplicated_bin = df_global.duplicated('Filename', keep='first') # First sources have precedence self.logger().info('Found {} duplicated files merging sources'.format(duplicated_bin.sum())) print('before dropping', df_global.shape) df_global = df_global[~duplicated_bin] # df_global.drop_duplicates('Filename', keep='first', inplace=True) print('after dropping', df_global.shape) print(df_global.columns) print(df_global.head()) # Save global DataFrame # outfile = os.path.join(self.outFolder, '{}_deleted.csv'.format(self.source)) outfile = '/home/pgarcia/global_deleted.csv' with open(outfile, 'w') as f: f.write(df_global.to_csv(index=False)) # exit() # Create number of files summary based on day, hour and partition self.get_stats(self.join_dataframes(df_usnjrnl, df_recycle), 'all')
def run(self, path=None): disk = getSourceImage(self.myconfig) disk.mount(partitions=self.myconfig('partitions'), vss=self.myconfig('vss'), unzip_path=self.myconfig('unzip_path')) if self.from_module: for data in self.from_module.run(path): yield data
def run(self, path=None): """ The path is ignored, and the source image is used. """ vss = self.myflag('vss') fls = self.myconfig('fls', 'fls') apfs_fls = self.myconfig('apfs_fls', 'fls') mactime = self.myconfig('mactime', 'mactime') disk = getSourceImage(self.myconfig) tl_path = self.myconfig('outdir') if vss: tl_path = self.myconfig('voutdir') check_folder(tl_path) if not vss: self.logger().info("Generating BODY file for %s", disk.disknumber) body = os.path.join(tl_path, "{}_BODY.csv".format(disk.disknumber)) # create the body file with open(body, "wb") as f: for p in disk.partitions: mountpath = base.utils.relative_path( p.mountpath, self.myconfig('casedir')) if not p.isMountable: continue if not disk.sectorsize: # unkwown sector size run_command([ fls, "-s", "0", "-m", mountpath, "-r", "-o", str(p.osects), "-i", "raw", disk.imagefile ], stdout=f, logger=self.logger()) elif p.filesystem == "NoName": # APFS filesystems are identified as NoName, according to our experience try: run_command([ apfs_fls, "-B", str(p.block_number), "-s", "0", "-m", mountpath, "-r", "-o", str(p.osects), "-b", str(disk.sectorsize), "-i", "raw", disk.imagefile ], stdout=f, logger=self.logger()) except Exception: # sometimes, APFS filesystems report a wrong offset. Try again with offset*8 run_command([ apfs_fls, "-B", str(p.block_number), "-s", "0", "-m", mountpath, "-r", "-o", str(p.osects * 8), "-b", str(disk.sectorsize), "-i", "raw", disk.imagefile ], stdout=f, logger=self.logger()) else: # we know the sector size if p.encrypted: run_command([ fls, "-s", "0", "-m", mountpath, "-r", "-b", str(disk.sectorsize), p.loop ], stdout=f, logger=self.logger()) else: run_command([ fls, "-s", "0", "-m", mountpath, "-r", "-o", str(p.osects), "-b", str(disk.sectorsize), disk.imagefile ], stdout=f, logger=self.logger()) # create the timeline using mactime self.logger().info("Creating timeline of {}".format( disk.disknumber)) hsum = os.path.join(tl_path, "%s_hour_sum.csv" % disk.disknumber) fcsv = os.path.join(tl_path, "%s_TL.csv" % disk.disknumber) with open(fcsv, "wb") as f: run_command([ mactime, "-b", body, "-m", "-y", "-d", "-i", "hour", hsum ], stdout=f, logger=self.logger()) run_command(['sed', '-i', '1,2d', hsum]) # Delete header because full path is included else: # generate body and timeline for each VSS in the disk for p in disk.partitions: for v, dev in p.vss.items(): if dev != "": self.logger().info( "Generating BODY file for {}".format(v)) body = os.path.join(tl_path, "{}_BODY.csv".format(v)) with open(body, "wb") as f: mountpath = base.utils.relative_path( p.mountpath, self.myconfig('casedir')) run_command([ fls, "-s", "0", "-m", "%s" % mountpath, "-r", dev ], stdout=f, logger=self.logger()) self.logger().info( "Creating timeline for {}".format(v)) hsum = os.path.join(tl_path, "%s_hour_sum.csv" % v) fcsv = os.path.join(tl_path, "%s_TL.csv" % v) with open(fcsv, "wb") as f: run_command([ mactime, "-b", body, "-m", "-y", "-d", "-i", "hour", hsum ], stdout=f, logger=self.logger()) run_command([ 'sed', '-i', '1,2d', hsum ]) # Delete header because full path is included self.logger().info("Timelines generation done!") return []