Esempio n. 1
0
    def parse_SysCache_hive(self):
        outfolder = self.myconfig('voutdir') if self.vss else self.myconfig(
            'outdir')
        # self.tl_file = os.path.join(self.myconfig('timelinesdir'), "%s_BODY.csv" % self.myconfig('source'))
        check_directory(outfolder, create=True)
        SYSC = self.search.search(r"/System Volume Information/SysCache.hve$")

        ripcmd = self.config.get('plugins.common', 'rip',
                                 '/opt/regripper/rip.pl')

        for f in SYSC:
            p = f.split('/')[2]
            output_text = run_command([
                ripcmd, "-r",
                os.path.join(self.myconfig('casedir'), f), "-p", "syscache_csv"
            ],
                                      logger=self.logger())
            output_file = os.path.join(outfolder, "syscache_%s.csv" % p)

            self.path_from_inode = FileSystem(
                config=self.config).load_path_from_inode(self.myconfig,
                                                         p,
                                                         vss=self.vss)

            save_csv(self.parse_syscache_csv(p, output_text),
                     outfile=output_file,
                     file_exists='OVERWRITE')

        self.logger().info("Finished extraction from SysCache")
Esempio n. 2
0
    def run(self, path=""):
        """ Parse UsnJrnl files of a disk """
        self.vss = self.myflag('vss')
        disk = getSourceImage(self.myconfig)

        self.usn_path = self.myconfig(
            'voutdir') if self.vss else self.myconfig('outdir')
        check_folder(self.usn_path)
        self.usn_jrnl_file = os.path.join(self.usn_path, "UsnJrnl")
        self.filesystem = FileSystem(self.config, disk=disk)

        for p in disk.partitions:
            if not p.isMountable:
                continue
            if not self.vss:
                pname = ''.join(['p', p.partition])
                self._parse_usnjrnl(pname)
            else:
                for v, dev in p.vss.items():
                    if dev == "":
                        continue
                    self._parse_usnjrnl(v)

        # Delete the temporal UsnJrnl dumped file
        if os.path.exists(self.usn_jrnl_file):
            os.remove(self.usn_jrnl_file)
        return []
Esempio n. 3
0
    def run(self, path=""):
        """ Parses lnk files, jumlists and customdestinations

        """
        self.logger().info("Extraction of lnk files")

        self.Files = GetFiles(self.config, vss=self.myflag("vss"))
        self.filesystem = FileSystem(self.config)
        self.mountdir = self.myconfig('mountdir')

        lnk_path = self.myconfig('{}outdir'.format('v' if self.vss else ''))
        check_folder(lnk_path)

        users = get_user_list(self.mountdir, self.vss)
        artifacts = {
            'lnk': {
                'filename': "{}_lnk.csv",
                'regex': r"{}/.*\.lnk$",
                'function': self.lnk_parser
            },
            'autodest': {
                'filename': "{}_jl.csv",
                'regex': r"{}/.*\.automaticDestinations-ms$",
                'function': self.automaticDest_parser
            },
            'customdest': {
                'filename': "{}_jlcustom.csv",
                'regex': r"{}/.*\.customDestinations-ms$",
                'function': self.customDest_parser
            }
        }

        for user in users:
            usr = "******".format(user.split("/")[0], user.split("/")[2])

            for a_name, artifact in artifacts.items():
                out_file = os.path.join(lnk_path,
                                        artifact['filename'].format(usr))
                files_list = list(
                    self.Files.search(artifact['regex'].format(user)))
                self.logger().info(
                    "Founded {} {} files for user {} at {}".format(
                        len(files_list), a_name,
                        user.split("/")[-1],
                        user.split("/")[0]))
                if len(files_list) > 0:
                    save_csv(artifact['function'](files_list),
                             config=self.config,
                             outfile=out_file,
                             quoting=0,
                             file_exists='OVERWRITE')
                    self.logger().info(
                        "{} extraction done for user {} at {}".format(
                            a_name,
                            user.split("/")[-1],
                            user.split("/")[0]))

        self.logger().info("RecentFiles extraction done")
        return []
Esempio n. 4
0
    def run(self, path=""):
        """ Characterizes a disk image

        """

        self.disk = getSourceImage(self.myconfig)
        self.filesystem = FileSystem(self.config, disk=self.disk)
        self.characterize_Linux()
        return []
Esempio n. 5
0
    def run(self, path=""):
        """ Main function to extract $Recycle.bin files. """
        if self.vss:
            output_path = self.myconfig('voutdir')
        else:
            output_path = self.myconfig('outdir')
            try:
                check_file(self.timeline_file, error_missing=True)
            except base.job.RVTError:
                return []

        check_directory(output_path, create=True)
        self.filesystem = FileSystem(self.config)

        # Get the users associated with each SID for every partition
        self.sid_user = {}
        if self.vss:
            for p in self.vss_partitions:
                self.sid_user[p] = self.generate_SID_user(p)
        else:
            for p in self.partitions:
                self.sid_user[p] = self.generate_SID_user(p)

        self.logger().info('Starting to parse RecycleBin')
        # RB_codes relates a a six digit recyclebin code with a path for a file. Are updated for each partition or vss?
        self.RB_codes = {}
        if self.vss:
            for partition in self.vss_partitions:
                self.logger().info(
                    'Processing Recycle Bin in partition {}'.format(partition))
                try:
                    self.parse_RecycleBin(partition)
                except Exception as exc:
                    if self.myflag('stop_on_error'):
                        raise exc
                    continue
                output_file = os.path.join(
                    output_path, "{}_recycle_bin.csv".format(partition))
                self.save_recycle_files(output_file, partition, sorting=True)
        else:
            try:
                self.parse_RecycleBin()
            except Exception as exc:
                if self.myflag('stop_on_error'):
                    raise exc
                return []
            output_file = os.path.join(output_path, "recycle_bin.csv")
            self.save_recycle_files(output_file, sorting=True)
        self.logger().info("Done parsing Recycle Bin!")

        return []
Esempio n. 6
0
    def run(self, path=""):
        self.disk = getSourceImage(self.myconfig)

        keyfile = path
        self.logger().debug('Testing existance of {}'.format(keyfile))
        if not keyfile:
            keyfile = self.myconfig('keyfile')
        check_file(keyfile, error_missing=True)

        # Get string files or generate them if not found
        self.string_path = self.myconfig('strings_dir')
        if not (check_directory(self.string_path)
                and os.listdir(self.string_path)):
            self.logger().debug("No string files found. Generating them")
            StringGenerate(config=self.config,
                           disk=self.disk).generate_strings()

        self.search_path = self.myconfig('outdir')
        check_directory(self.search_path, create=True)

        self.keywords = getSearchItems(
            keyfile)  # Get kw:regex dictionary reading keyfile
        self.blocks = {
        }  # Store set of blocks for kw and partition. Ex: {'my_kw': {'p02': set(1234, 1235, ...)}}
        self.block_status = defaultdict(
            dict
        )  # Store status for blocks with search hits in a partition. Ex:{'03':{4547:'Allocated', 1354536:'Not Allocated'}}

        self.fs_object = FileSystem(self.config, disk=self.disk)

        # Generate or load 'hits_' and 'blocks_' files
        for kname in tqdm(self.keywords,
                          total=len(self.keywords),
                          desc='Searching keywords in strings'):
            kw = kname.strip()
            self.get_blocks(kw, self.keywords[kname])

        # Generate 'all_' files
        self.get_cluster()

        self.logger().info("StringSearch done")
        return []
Esempio n. 7
0
class SysCache(base.job.BaseModule):
    def run(self, path=""):
        self.search = GetFiles(self.config, vss=self.myflag("vss"))
        self.vss = self.myflag('vss')
        self.logger().info("Parsing Syscache from registry")
        self.parse_SysCache_hive()
        return []

    def parse_SysCache_hive(self):
        outfolder = self.myconfig('voutdir') if self.vss else self.myconfig(
            'outdir')
        # self.tl_file = os.path.join(self.myconfig('timelinesdir'), "%s_BODY.csv" % self.myconfig('source'))
        check_directory(outfolder, create=True)
        SYSC = self.search.search(r"/System Volume Information/SysCache.hve$")

        ripcmd = self.config.get('plugins.common', 'rip',
                                 '/opt/regripper/rip.pl')

        for f in SYSC:
            p = f.split('/')[2]
            output_text = run_command([
                ripcmd, "-r",
                os.path.join(self.myconfig('casedir'), f), "-p", "syscache_csv"
            ],
                                      logger=self.logger())
            output_file = os.path.join(outfolder, "syscache_%s.csv" % p)

            self.path_from_inode = FileSystem(
                config=self.config).load_path_from_inode(self.myconfig,
                                                         p,
                                                         vss=self.vss)

            save_csv(self.parse_syscache_csv(p, output_text),
                     outfile=output_file,
                     file_exists='OVERWRITE')

        self.logger().info("Finished extraction from SysCache")

    def parse_syscache_csv(self, partition, text):
        for line in text.split('\n')[:-1]:
            line = line.split(",")
            fileID = line[1]
            inode = line[1].split('/')[0]
            name = self.path_from_inode.get(inode, [''])[0]
            try:
                yield OrderedDict([("Date", dateutil.parser.parse(
                    line[0]).strftime("%Y-%m-%dT%H:%M%SZ")), ("Name", name),
                                   ("FileID", fileID), ("Sha1", line[2])])
            except Exception:
                yield OrderedDict([("Date", dateutil.parser.parse(
                    line[0]).strftime("%Y-%m-%dT%H:%M%SZ")), ("Name", name),
                                   ("FileID", fileID), ("Sha1", "")])
Esempio n. 8
0
class StringSearch(base.job.BaseModule):
    """ Find strings that matches regular expression.
    There are three different output files types:
     - *hits_somekeyword*: For every hit in the search of 'somekeyword' in strings, show:
        Partition;Offset;Block;Status;String
     - *blocks_somekeyword*: All blocks (clusters) associated with a hit for a partition.
        It is an intermediate file, only for perfoming purposes
     - *all_somekeyword*: Displays every block where somekeyword has been found, along with the next information:
        Partition;Block;Inode;InodeStatus;PossibleFilename

    Parameter:
        path (str): filename with keywords to seek (same as keyfile in configuration)

    Configuration:
        - **keyfile**: default filename with keywords in case path is not specified
        - **outdir**: path to directory where generated match files will be stored
        - **strings_dir**: path to directory where string files are generated.

    Warning: if a keyword is found between two consecutive blocks, result won't be shown.
    """
    def run(self, path=""):
        self.disk = getSourceImage(self.myconfig)

        keyfile = path
        self.logger().debug('Testing existance of {}'.format(keyfile))
        if not keyfile:
            keyfile = self.myconfig('keyfile')
        check_file(keyfile, error_missing=True)

        # Get string files or generate them if not found
        self.string_path = self.myconfig('strings_dir')
        if not (check_directory(self.string_path)
                and os.listdir(self.string_path)):
            self.logger().debug("No string files found. Generating them")
            StringGenerate(config=self.config,
                           disk=self.disk).generate_strings()

        self.search_path = self.myconfig('outdir')
        check_directory(self.search_path, create=True)

        self.keywords = getSearchItems(
            keyfile)  # Get kw:regex dictionary reading keyfile
        self.blocks = {
        }  # Store set of blocks for kw and partition. Ex: {'my_kw': {'p02': set(1234, 1235, ...)}}
        self.block_status = defaultdict(
            dict
        )  # Store status for blocks with search hits in a partition. Ex:{'03':{4547:'Allocated', 1354536:'Not Allocated'}}

        self.fs_object = FileSystem(self.config, disk=self.disk)

        # Generate or load 'hits_' and 'blocks_' files
        for kname in tqdm(self.keywords,
                          total=len(self.keywords),
                          desc='Searching keywords in strings'):
            kw = kname.strip()
            self.get_blocks(kw, self.keywords[kname])

        # Generate 'all_' files
        self.get_cluster()

        self.logger().info("StringSearch done")
        return []

    def get_blocks(self, kw, regex):
        """ Updates variable self.blocks, that stores set of blocks for kw and partition, creating new 'block' and 'hits' files """
        self.blocks_file_path = os.path.join(self.search_path,
                                             "blocks_{}".format(kw))
        hits_file = os.path.join(self.search_path, "hits_%s" % kw)

        # Create hits file if not found
        if not check_file(hits_file) or os.path.getsize(hits_file) == 0:
            self.logger().debug('Creating {} file'.format("hits_%s" % kw))
            extra_args = {'write_header': True, 'file_exists': 'OVERWRITE'}
            save_csv(self.search_strings(kw, regex),
                     config=self.config,
                     outfile=hits_file,
                     **extra_args)

        # Create or load blocks file if not found
        if not check_file(self.blocks_file_path) or os.path.getsize(
                self.blocks_file_path) == 0:
            self.blocks[kw] = defaultdict(list)
            cmd = "sed -n '1!p' {} | cut -d ';' -f1,3 | sort | uniq".format(
                hits_file)
            for line in yield_command(cmd, logger=self.logger()):
                part, blk = line.split(';')
                part = part.strip('"')
                self.blocks[kw][part].append(int(blk.strip('"').rstrip('\n')))
            self.save_blocks_file(self.blocks[kw], kw)
        else:
            self.logger().info('Loading {} file'.format("blocks_%s" % kw))
            try:
                with open(self.blocks_file_path, "r") as block_file:
                    self.blocks[kw] = json.load(block_file)
            except Exception as exc:
                self.logger().error('Cannot load {}'.format(
                    self.blocks_file_path))
                raise exc

    def search_strings(self, kw, regex):
        """ Generates a string search and yields hits. Also stores blocks where there's a match for the keyword 'kw'.

        Parameters:
            kw (str): keyword name
            regex (str): regular expression associated to keyword

        Yields:
            Dictionaries containing partition, block, offset and string match
        """
        self.logger().info('Searching keyword {} with regex {}'.format(
            kw, regex))

        partitions = {
            p.partition: [p.loop if p.loop != "" else "", p.clustersize]
            for p in self.disk.partitions
        }
        blocks = {}
        for p in self.disk.partitions:
            blocks.update({''.join(['p', p.partition]): set()})

        # In string files to search, all characters are lowercase, so the '-i' option is no needed
        grep = self.myconfig('grep', '/bin/grep')
        args = "-H" if kw == regex else "-HP"
        regex_search = [regex] if regex else [kw]
        search_command = '{} {} '.format(grep, args) + '"{regex}" "{path}"'
        module = base.job.load_module(self.config,
                                      'base.commands.RegexFilter',
                                      extra_config=dict(
                                          cmd=search_command,
                                          keyword_list=regex_search,
                                          from_dir=self.string_path))

        srch = re.compile(r"(p\d{1,2})_strings_?[\w.]+:\s*(\d+)\s+(.*)")
        for f in os.listdir(self.string_path):
            for match in module.run(os.path.join(self.string_path, f)):
                line = match['match']
                aux = srch.match(line)
                if not aux:
                    continue

                pname, offset, string = aux.group(1), aux.group(2), aux.group(
                    3)
                pt = pname[1:]
                bsize = int(partitions[pt][1])

                try:
                    blk = int(offset) // bsize
                    if blk not in self.block_status[pt]:
                        self.block_status[pt][
                            blk] = self.fs_object.cluster_allocation_status(
                                pname, str(blk))
                    status = self.block_status[pt].get(blk)
                except Exception as exc:
                    self.logger().error('Error searching {} in line {}'.format(
                        srch, line))
                    raise exc

                if blk not in blocks[pname]:  # new block
                    blocks[pname].add(blk)

                yield OrderedDict([('Partition', pname),
                                   ('Offset', int(offset)), ('Block', blk),
                                   ('Status', status), ('String', string)])

        # Save blocks where a kw has been found
        if not check_file(self.blocks_file_path):
            self.save_blocks_file(blocks, kw)

    def save_blocks_file(self, blocks, kw):
        self.logger().info('Creating {} file'.format("blocks_%s" % kw))
        blocks = {p: list(b)
                  for p, b in blocks.items()
                  }  # json does not accept set structure
        outfile = os.path.join(self.search_path, "blocks_%s" % kw)
        save_json((lambda: (yield blocks))(),
                  config=self.config,
                  outfile=outfile,
                  file_exists='OVERWRITE')

    def get_cluster(self):
        """ Generates report files containing information about the block where a hit is found, along with the contents of the block itself. """
        self.inode_from_block = {}
        self.inode_status = {}
        self.path_from_inode = {}
        self.path_from_inode_del = {}

        # Creating relation between every inode and its blocks takes a long time.
        # Searching only the required blocks, although slower one by one, colud be faster if the list is short
        blocks_thereshold = 20000  # it takes about an hour
        sum_blocks = 0
        for kw, parts in self.blocks.items():
            for p in parts:
                sum_blocks += len(parts[p])
        if sum_blocks > blocks_thereshold:
            for p in self.disk.partitions:
                if not p.isMountable or p.filesystem == "NoName":
                    continue
                self.inode_from_block['p{}'.format(
                    p.partition)] = self.fs_object.load_inode_from_block(
                        partition='p{}'.format(p.partition))

        # Get the necessary files relating inodes with paths and status
        for p in self.disk.partitions:
            if not p.isMountable or p.filesystem == "NoName":
                continue
            part_name = 'p{}'.format(p.partition)
            self.inode_status[part_name] = self.fs_object.load_inode_status(
                partition=part_name)
            self.path_from_inode[
                part_name] = self.fs_object.load_path_from_inode(
                    partition=part_name)
            self.path_from_inode_del[
                part_name] = self.fs_object.load_path_from_inode(
                    partition=part_name, deleted=True)

        self.used_blocks = defaultdict(set)
        self.block_inodes = defaultdict(dict)

        for kw in self.blocks:
            all_file = os.path.join(self.search_path, "all_{}".format(kw))
            if check_file(all_file) and os.path.getsize(all_file) != 0:
                self.logger().info(
                    'File {} already generated'.format(all_file))
                continue
            with open(all_file, "wb") as all_stream:
                for entry in self.all_info(self.blocks[kw], kw):
                    all_stream.write(entry)

    def all_info(self, kw_blocks, kw=''):
        """ Yields partition, block, inode, status, file and block content for each block where there is a match for 'kw'

        Parameters:
            kw_blocks (dict): mapping between partition and blocks with a hit for a keyword
            kw (str): keyword name
        """

        for p_name, blks in kw_blocks.items():
            # p_name = ''.join(['p', pt])
            for blk in tqdm(
                    blks,
                    total=len(blks),
                    desc='Dumping searches for {} in partition {}'.format(
                        kw, p_name)):
                self.used_blocks[p_name].add(blk)

                if blk not in self.block_inodes[p_name]:
                    inodes = self.fs_object.inode_from_cluster(
                        p_name, blk, self.inode_from_block.get(p_name, None))
                    self.block_inodes[p_name][blk] = inodes
                else:
                    inodes = self.block_inodes[p_name][blk]

                if not inodes:
                    yield "Pt: {}; Blk: {}; Inode: {} {}; File: {}\n".format(
                        p_name, blk, '', 'Not Allocated', '').encode()

                for inode in inodes:
                    status = self.inode_status[p_name].get(inode, "f")
                    try:
                        paths = self.path_from_inode[p_name][inode]
                    except KeyError:
                        paths = self.path_from_inode_del[p_name].get(
                            inode, [""])

                    for name in paths:
                        alloc = 'Allocated' if status == 'a' else 'Not Allocated'
                        yield "Pt: {}; Blk: {}; Inode: {} {}; File: {}\n".format(
                            p_name, blk, inode, alloc, name).encode()

                yield b"\n"
                yield self.fs_object.cluster_extract(p_name, str(blk))
                yield '\n\n{}\n'.format('-' * 42).encode()
Esempio n. 9
0
    def run(self, path=""):
        """ Generator of INDX entries as dictionaries. Also writes to csv files"""
        self.disk = getSourceImage(self.myconfig)
        self.sector_size = self.disk.sectorsize

        self.parseINDX_ROOTFiles = self.myflag(
            'root', False)  # Parse also INDX_ROOT records if set
        self.skip_short_filenames = self.myflag('skip_short', False)
        self.only_slack = self.myflag('only_slack', False)

        outdir = self.myconfig('outdir')
        check_directory(outdir, create=True)

        for p in self.disk.partitions:
            if not p.isMountable:
                continue

            # Get a dictionary {inode: list of names} from 'fls' to later relate inodes to a path. 'inode' keys are strings, not int.
            part_name = ''.join(['p', p.partition])
            try:
                self.inode_fls = FileSystem(
                    self.config).load_path_from_inode(partition=part_name)
                self.logger().debug(
                    'Correctly loaded inode-name relation file for partiton {}'
                    .format(part_name))
            except Exception as e:
                self.logger().error(e)
                continue

            # Start the carving at next to last execution block parsed
            outfile = os.path.join(
                outdir, '{}{}_INDX_timeline.csv'.format(
                    part_name, '_slack' if self.only_slack else ''))
            self.lastParsedBlk = 0
            if self.myflag('use_localstore'):
                self.lastParsedBlk = int(
                    self.config.store_get(
                        'last_{}_block_parsed'.format(part_name), 0))
            self.logger().debug('lastParsedBlk: {}'.format(self.lastParsedBlk))

            csv_args = {'file_exists': 'APPEND', 'write_header': True}
            if self.lastParsedBlk:
                if not os.path.exists(outfile):
                    self.logger().warning(
                        'Starting new file {0} at an advanced offset. Set "last_{0}_block_parsed" at 0 in "store.ini" if a fresh start is desired'
                        .format(outfile))
                else:
                    csv_args['write_header'] = False
            else:
                if os.path.exists(outfile):
                    self.logger().warning(
                        'Overwriting file {}'.format(outfile))
                    csv_args['file_exists'] = 'OVERWRITE'

            # Write the parsed entries to a csv file for each partition.
            save_csv(self.parse_INDX(p),
                     config=self.config,
                     outfile=outfile,
                     quoting=0,
                     **csv_args)
        return []
Esempio n. 10
0
class Characterize(base.job.BaseModule):
    def run(self, path=""):
        """ Characterizes a disk image

        """

        self.disk = getSourceImage(self.myconfig)
        self.filesystem = FileSystem(self.config, disk=self.disk)
        self.characterize_Linux()
        return []

        # disk_info = self.get_image_information(self.disk)
        # os_info = self.characterize_Windows(self.disk)

        # env = Environment(loader=FileSystemLoader(os.path.abspath(os.path.dirname(__file__))))
        # template = env.get_template("templates/characterize.md")

        # analysisdir = self.myconfig('analysisdir')
        # with open(os.path.join(analysisdir, "characterize.md"), "w") as f:
        #     output_text = template.render(disk_info=disk_info, os_info=os_info, source=self.myconfig('source'))
        #     f.write(output_text)

    # def get_image_information(self, disk):

    #     disk_info = {}

    #     disk_info["Size"] = sizeof_fmt(os.stat(disk.imagefile).st_size)
    #     disk_info["npart"] = disk.getPartitionNumber()

    #     logfile = "{}.LOG".format(disk.imagefile[:-2])

    #     if not os.path.isfile(logfile):
    #         logfile = "{}.LOG".format(disk.imagefile[:6])

    #     if os.path.isfile(logfile):
    #         with open(logfile, "r") as f1:
    #             for linea in f1:
    #                 aux = re.search("\*\s*(Model\s*:\s*[^\|]*)\|\s*Model\s*:", linea)
    #                 if aux:
    #                     disk_info["model"] = aux.group(1)
    #                 aux = re.search("\*\s*(Serial\s*:\s*[^\|]*)\|\s*Serial\s*:", linea)
    #                 if aux:
    #                     disk_info["serial_number"] = aux.group(1)
    #     disk_info["partition"] = []

    #     for p in disk.partitions:
    #         if p.filesystem != "Unallocated" and not p.filesystem.startswith("Primary Table"):
    #             disk_info["partition"].append({"pnumber": p.partition, "size": sizeof_fmt(p.size), "type": p.filesystem})
    #     return disk_info

    def characterize_Linux(self):
        """

        """

        self.outfile = self.myconfig('outfile')
        check_directory(os.path.dirname(self.outfile), create=True)

        for p in self.disk.partitions:
            part_path = os.path.join(self.myconfig('mountdir'),
                                     "p%s" % p.partition)
            if not os.path.isdir(os.path.join(part_path, "etc")):
                continue
            releas_f = ""
            if os.path.isfile(os.path.join(
                    part_path, "etc/lsb-release")) or os.path.islink(
                        os.path.join(part_path, "etc/lsb-release")):
                releas_f = os.path.join(part_path, "etc/lsb-release")
                if os.path.islink(releas_f):
                    releas_f = os.path.join(part_path,
                                            os.path.realpath(releas_f)[1:])
            else:
                for f in os.listdir(os.path.join(part_path, "etc")):
                    if f.endswith("-release"):
                        releas_f = os.path.join(part_path, "etc", f)

            with open(self.outfile, 'w') as out_f:
                if releas_f != "":
                    out_f.write("Information of partition {}\n\n".format(
                        p.partition))
                    f_rel = open(releas_f, "r")
                    dist_id = f_rel.readline().split("=")[-1].rstrip()
                    dist_rel = f_rel.readline().split("=")[-1].rstrip()
                    dist_coden = f_rel.readline().split("=")[-1].rstrip()
                    dist_desc = f_rel.readline().split("=")[-1].rstrip()
                    kernel_v = ""
                    f_hostname = open(os.path.join(part_path, "etc/hostname"),
                                      "r")
                    hostname = f_hostname.read().rstrip()
                    f_hostname.close()
                    f_rel.close()
                    if os.path.isfile(os.path.join(part_path,
                                                   "var/log/dmesg")):
                        f_dmesg = open(
                            os.path.join(part_path, "var/log/dmesg"), "r")
                        for linea in f_dmesg:
                            aux = re.search(r"(Linux version [^\s]*)", linea)
                            if aux:
                                kernel_v = aux.group(1)
                                break
                        f_dmesg.close()
                out_f.write(
                    "Distribution ID:\t\t{}\nDistribution Release:\t\t{}\nDistribution codename:\t\t{}\nDistribution description:\t{}\nKernel version:\t{}\nHostname:\t{}\n"
                    .format(dist_id, dist_rel, dist_coden, dist_desc, kernel_v,
                            hostname))

                install_date = ""

                if os.path.isdir(
                        os.path.join(self.myconfig('mountdir'),
                                     "p%s" % p.partition, "root")):
                    item = os.path.join(self.myconfig('source'), 'mnt',
                                        "p%s" % p.partition, "root")
                    install_date = self.filesystem.get_macb([item])[item][3]

                for f in [
                        "root/install.log", "var/log/installer/syslog",
                        "root/anaconda-ks.cfg"
                ]:
                    if os.path.isfile(
                            os.path.join(self.myconfig('mountdir'),
                                         "p%s" % p.partition, f)):
                        item = os.path.join(self.myconfig('source'), 'mnt',
                                            "p%s" % p.partition, f)
                        install_date = self.filesystem.get_macb([item
                                                                 ])[item][3]
                        break

                if install_date != "":
                    out_f.write("Install date:\t{}\n\n".format(install_date))

            # usuarios
            self.get_linux_lastlog(p.partition)

            temp = self.get_linux_wtmp(os.path.join(part_path, "var/log"))

            # temp = subprocess.check_output('last -f {} --time-format iso'.format(os.path.join(part_path, "var/log/wtmp")), shell=True).decode("utf-8")
            with open(self.outfile, 'a') as out_f:
                out_f.write("\nLogins:\n\n{}".format(temp))

    # Auxiliary functions
    def getrecord(self, file, uid, preserve=False):
        """
        Returns [int(unix_time),string(device),string(host)] from the lastlog formated file object, set preserve = True to preserve your position within the file

        """

        position = file.tell()
        recordsize = struct.calcsize('=L32s256s')
        file.seek(recordsize * uid)
        data = file.read(recordsize)
        if preserve:
            file.seek(position)
        try:
            returnlist = list(struct.unpack('=L32s256s', data))
            returnlist[1] = returnlist[1][:int(returnlist[1].decode().
                                               index('\x00'))]
            returnlist[2] = returnlist[2][:int(returnlist[2].decode().
                                               index('\x00'))]
            return returnlist
        except Exception:
            recordsize = struct.calcsize('L32s256s')

            returnlist = list(struct.unpack('L32s256s', data))
            returnlist[1] = returnlist[1][:int(returnlist[1].decode().
                                               index('\x00'))]
            returnlist[2] = returnlist[2][:int(returnlist[2].decode().
                                               index('\x00'))]
            return returnlist
        else:
            return False

    def get_linux_wtmp(self, log_path):
        """ Extrats login information """

        output = ""

        for fichero in os.listdir(log_path):
            if fichero == "wtmp":
                temp = subprocess.check_output([
                    'last', '-f',
                    os.path.join(log_path, fichero), '--time-format', 'iso'
                ])
                output += temp.decode()
            elif re.search(r"wtmp.*\.gz", fichero):
                temp_f = open("/tmp/wtmp.temp", "wb")
                with gzip.open(os.path.join(log_path, fichero), 'rb') as f:
                    temp_f.write(f.read())
                temp_f.close()
                temp = subprocess.check_output(
                    ['last', '-f', '/tmp/wtmp.temp', '--time-format', 'iso'])
                output += temp.decode()
        return output

    def get_linux_lastlog(self, partition):
        # function to extract last logins table
        # TO DO extract UUID of loopdevices with blkid and compare with UUID of /home from /etc/fstab
        try:
            llfile = open(
                os.path.join(self.myconfig('mountdir'), "p%s" % partition,
                             "var/log/lastlog"), 'rb')
        except Exception as exc:
            self.logger().error("Unable to open %s" %
                                os.path.join(self.myconfig('mountdir'), "p%s" %
                                             partition, "var/log/lastlog"))
            raise exc

        user = dict()

        f_shadow = open(
            os.path.join(self.myconfig('mountdir'), "p%s" % partition,
                         "etc/shadow"), "r")
        for linea in f_shadow:
            linea = linea.split(":")
            if len(linea[1]) > 1:  # user with password
                user[linea[0]] = []
        f_shadow.close()

        f_passwd = open(
            os.path.join(self.myconfig('mountdir'), "p%s" % partition,
                         "etc/passwd"), "r")
        for linea in f_passwd:
            linea = linea.split(":")
            if linea[0] in user.keys():
                user[linea[0]].append(linea[2])
        f_passwd.close()

        lista = []
        for k in user.keys():
            lista.append(
                os.path.join(self.myconfig('source'), 'mnt', "p%s" % partition,
                             "home", k))

        user2 = self.filesystem.get_macb(lista)
        with open(self.outfile, 'a') as out_f:
            out_f.write('From timeline:\n')
            out_f.write('User\tm_time\ta_time\'c_time\tb_time\n')
            for u in user2:
                out_f.write('{}\t{}\t{}\t{}\t{}\n'.format(
                    u.split('/')[-1], *user2[u]))

            out_f.write('\nFrom lastlog:\n')
            out_f.write('User\tuid\tLast login\tIP\n')
            for user, uid in user.items():
                record = self.getrecord(llfile, int(uid[0]))
                if record and record[0] > 0:
                    out_f.write('{}\t{}\t{}\t{}\n'.format(
                        user, uid[0],
                        datetime.datetime.fromtimestamp(
                            int(record[0]).strftime('%Y-%m-%dT%H:%M:%SZ')),
                        record[2].decode()))
                elif record:
                    out_f.write('{}\t{}\t{}\t{}\n'.format(
                        user, uid[0], " ", record[2].decode()))
                else:
                    pass
        llfile.close()
Esempio n. 11
0
class Recycle(base.job.BaseModule):
    """ Obtain a summary of all files found in the Recycle Bin

    Output file fields description:
        * Date: original file deletion date
        * Size: original deleted file size in bytes
        * File: path to file in Recycle Bin
        * OriginalName: original deleted file path
        * Inode: Inode number of the deleted file (it may not be allocated)
        * Status: allocation status of the Recycle Bin file.
        * User: user the recycle bin belongs to. If not found a SID is shown
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.disk = getSourceImage(self.myconfig)
        self.image = os.path.join(self.myconfig('imagedir'),
                                  self.disk.disknumber)
        self.vss = self.myflag('vss')

        # Associate a partition name with a partition object or a loop device
        self.partitions = {
            ''.join(['p', p.partition]): p
            for p in self.disk.partitions if p.isMountable
        }
        if not self.partitions:
            self.logger().error('No partitions found in image {}'.format(
                self.disk.imagefile))
            exit(1)
        self.vss_partitions = {
            v: dev
            for p in self.partitions.values() for v, dev in p.vss.items()
            if dev
        }
        self.logger().debug('Partitions: {}'.format(self.partitions))
        self.logger().debug('Vss Partitions: {}'.format(self.vss_partitions))

        self.mountdir = self.myconfig('mountdir')
        if not os.path.isdir(self.mountdir):
            self.logger().error("Mount directory {} does not exist".format(
                self.mountdir))
            exit(1)

        self.timeline_file = os.path.join(
            self.myconfig('timelinesdir'),
            '{}_BODY.csv'.format(self.disk.disknumber))

    def run(self, path=""):
        """ Main function to extract $Recycle.bin files. """
        if self.vss:
            output_path = self.myconfig('voutdir')
        else:
            output_path = self.myconfig('outdir')
            try:
                check_file(self.timeline_file, error_missing=True)
            except base.job.RVTError:
                return []

        check_directory(output_path, create=True)
        self.filesystem = FileSystem(self.config)

        # Get the users associated with each SID for every partition
        self.sid_user = {}
        if self.vss:
            for p in self.vss_partitions:
                self.sid_user[p] = self.generate_SID_user(p)
        else:
            for p in self.partitions:
                self.sid_user[p] = self.generate_SID_user(p)

        self.logger().info('Starting to parse RecycleBin')
        # RB_codes relates a a six digit recyclebin code with a path for a file. Are updated for each partition or vss?
        self.RB_codes = {}
        if self.vss:
            for partition in self.vss_partitions:
                self.logger().info(
                    'Processing Recycle Bin in partition {}'.format(partition))
                try:
                    self.parse_RecycleBin(partition)
                except Exception as exc:
                    if self.myflag('stop_on_error'):
                        raise exc
                    continue
                output_file = os.path.join(
                    output_path, "{}_recycle_bin.csv".format(partition))
                self.save_recycle_files(output_file, partition, sorting=True)
        else:
            try:
                self.parse_RecycleBin()
            except Exception as exc:
                if self.myflag('stop_on_error'):
                    raise exc
                return []
            output_file = os.path.join(output_path, "recycle_bin.csv")
            self.save_recycle_files(output_file, sorting=True)
        self.logger().info("Done parsing Recycle Bin!")

        return []

    def parse_RecycleBin(self, partition=None):
        """ Search all Recycle.Bin files found on the timeline. Both allocated and deleted. """
        # Find the $I files first so a list of codes associated to RecycleBin files can be created
        # Then uses that list to assign names and data to $R files found later.
        self.i_files = {}
        self.r_files = []

        if self.vss:
            self.timeline_file = os.path.join(self.myconfig('vtimelinesdir'),
                                              '{}_BODY.csv'.format(partition))
            try:
                check_file(self.timeline_file, error_missing=True)
            except base.job.RVTError as e:
                self.logger().warning('{}. Skipping vss {}'.format(
                    e, partition))
                return
        self.logger().debug('Timeline file: {}'.format(self.timeline_file))

        search_command = 'grep -P "{regex}" "{path}"'

        # Parse $I files in RecycleBin:
        self.logger().info('Searching RecycleBin $I files')
        # Realloc files have metadata pointing to new allocated data that does not match the filename.
        # They cannot be recovered, but the reference to an older name can give some usefull information, so they are included
        regex = [r'\$Recycle\.Bin.*\$I', r'\$RECYCLE\.BIN.*\$I']
        module = base.job.load_module(self.config,
                                      'base.commands.RegexFilter',
                                      extra_config=dict(cmd=search_command,
                                                        keyword_list=regex))

        if not os.path.exists(self.timeline_file) or os.path.getsize(
                self.timeline_file) == 0:
            self.logger().error(
                'Timeline BODY file not found or empty for partition {}. Run fs_timeline job before executing winRecycle'
                .format(partition))
            raise base.job.RVTError(
                'Timeline BODY file not found or empty for partition {}. Run fs_timeline job before executing winRecycle'
                .format(partition))

        for line in module.run(self.timeline_file):
            self._process_I_file(line['match'], partition)

        # Parse $R files in RecycleBin:
        self.logger().info('Searching RecycleBin $R files')
        regex = [r'\$Recycle\.Bin.*\$R', r'\$RECYCLE\.BIN.*\$R']
        module = base.job.load_module(self.config,
                                      'base.commands.RegexFilter',
                                      extra_config=dict(cmd=search_command,
                                                        keyword_list=regex))

        for line in module.run(self.timeline_file):
            self._process_R_file(line['match'], partition)

    def _process_timeline_record(self, body_record):
        """ Extract and modify relevant information of each timeline_BODY record supplied. """
        # Timeline BODY fields: "file_md5|path|file_inode|file_mode|file_uid|file_gid|file_size|file_access|file_modified|file_changerecord|file_birth"
        _, filename, inode, _, _, _, size, _, _, change_time, _ = body_record.split(
            '|')
        # filename format for vss:  'vYpXX/path' or 'vYYpXX/path' if more than 9 vss in a partition
        # filename format for regular timeline:  'source/mnt/pXX/path' or 'source/mnt/p0/path' if single partition in image

        if filename.find('$FILE_NAME') > 0:  # Skip $FILE_NAME files
            return

        fn_splitted = filename.split('/')
        # Mark status of the file [allocated, deleted, realloc]. In realloc entries extraction makes no sense
        file_status = 'realloc' if filename[-9:] == '-realloc)' else (
            'deleted' if filename[-9:] == '(deleted)' else 'allocated')

        if self.vss:
            partition, SID = fn_splitted[2], fn_splitted[4]
            if partition not in self.partitions:
                self.logger().warning(
                    'Partition number {} obtained from timeline does not match any partition'
                    .format(partition))
                return
            # Clean filename stripping the '(deleted)' ending
            filename = filter_deleted_ending(filename)
            user = self.get_user_from_SID(SID, partition)
        else:
            part, SID = fn_splitted[2], fn_splitted[4]
            try:  # Find partition object associated to selected partition number
                partition = self.partitions[part]
            except KeyError:
                self.logger().warning(
                    'Partition number {} obtained from timeline does not match any partition'
                    .format(part))
                return
            # Clean filename stripping the '(deleted)' ending
            filename = filter_deleted_ending(filename)
            user = self.get_user_from_SID(SID, part)

        size = int(size)
        inode = int(inode.split('-')[0])

        return filename, size, inode, partition, user, file_status

    def _process_I_file(self, line, p_name):
        """ Extract metadata from every $I files and store it. """
        try:
            filename, size, inode, partition, user, file_status = self._process_timeline_record(
                line)
        except TypeError:
            return

        if size == 0 or size > 4096:  # Standard size of $I file is 544 bytes. Avoid empty or corrupted files.
            self.logger().debug(
                'Wrong $I file size ({}). Not parsing {}'.format(
                    size, filename))
            return

        # For allocated files, search the file in mounted disk. In case of deleted recover from inode
        if file_status == 'allocated':
            if self.vss:
                record = os.path.join(
                    self.myconfig('casedir'),
                    filename.replace(p_name[p_name.find('p'):], p_name, 1))
            else:
                record = os.path.join(self.myconfig('casedir'), filename)
        elif file_status == 'deleted':
            if self.vss:
                record = self.filesystem.icat(inode, p_name, vss=True)
            else:
                record = self.filesystem.icat(inode, p_name)
                # subprocess.run('icat -o {} {}.dd {} > {}'.format(offset, self.image, inode, tempfile), shell=True)
        else:  # realloc. Not even try to parse
            return

        try:
            i_data = self.get_data(record,
                                   filename,
                                   status=file_status,
                                   user=user)
        except Exception as e:
            self.logger().error(e)
            return
        if i_data:
            rb_code = self.get_bin_name(filename, I_file=True)
            if rb_code not in self.RB_codes:  # It should not be except for vss
                self.RB_codes[rb_code] = i_data['OriginalName']
            self.i_files[rb_code] = i_data

    def _process_R_file(self, line, p_name):
        """ List $R files not parsed as $I. Updates inode in $I files"""
        try:
            filename, size, inode, partition, user, file_status = self._process_timeline_record(
                line)
        except TypeError:
            return

        bin_code = self.get_bin_name(filename, I_file=False)
        char_pos = filename.find(
            '$R{}'.format(bin_code)
        )  # First match of '#R' will be with '#Recycle', that's why '$Rcode' is looked for.
        # When a directory and its contents are sent to the Recycle Bin, only the dir has an associated $Icode file. Subfiles inside are stored as $Rcode{ending}/somesubfolder/somefile
        # Detect if $R file belongs to a directory sent to Bin
        try:
            sep_char = filename[char_pos + 8:].find('/')
            subfile = True if sep_char != -1 else False
            # subfile = True if filename[char_pos + 8] == '/' else False
        except IndexError:
            subfile = False

        if file_status == 'realloc':
            inode = 0  # Makes no sense to recover from inode, since it has been reallocated
        if bin_code in self.RB_codes:
            if not subfile:  # Already parsed as $I, only lacks inode
                self.update_inode(inode, bin_code, file_status)
                return
            else:  # Subfiles in the directory
                # Take the first part of the path from the corresponding $I file, append the rest
                original_name = os.path.join(
                    self.i_files[bin_code]['OriginalName'],
                    filename[char_pos + 9 + sep_char:])
                # Containing folder and all subfiles were deleted at the same time, otherwise another recycle code would have been generated
                del_time = self.i_files[bin_code]['Date']
        else:
            # TODO: search inode in vss_fls and get name
            original_name = ''  # Can't determine original name
            del_time = datetime.datetime(1970, 1,
                                         1).strftime("%Y-%m-%d %H:%M:%S")

        r_data = OrderedDict([('Date', del_time), ('Size', size),
                              ('File', filename),
                              ('OriginalName', original_name),
                              ('Inode', inode), ('Status', file_status),
                              ('User', user)])
        if r_data:
            self.r_files.append(r_data)

    @staticmethod
    def get_bin_name(fname, I_file=True):
        """ Extract the 6 characters name assigned by the Recycle Bin """
        if I_file:
            pos = fname.find("$I")
            return fname[pos + 2:pos + 8]
        else:
            start = fname.find("$R")
            pos = fname[start + 2:].find("$R")
            return fname[start + pos + 4:start + pos + 10]

    def update_inode(self, inode, bin_code, file_status):
        ino = self.i_files[bin_code].get('Inode', 0)
        if not ino and inode:  # Upsate only when new inode is different than 0 and Inode key was 0
            self.i_files[bin_code]['Inode'] = inode

    def get_data(self, file, filepath, status='allocated', inode=0, user=''):
        """ Return a new record parsing file's metadata.
        Args:
            file (str or bytes): $I url or byte-string containing the data
            filepath (str): name of the mount path to $I file
            status (str): allocated, deleted, realloc
            inode (int): inode of the $R file
        Returns:
            dict: keys = [Date, Size, File, OriginalName, Inode, Status, User]
        """
        try:
            with BytesIO(file) as f:  # file is a byte-string
                data = self.get_metadata(f, filepath)
        except TypeError:
            with open(file,
                      'rb') as f:  # file is an url str of a path location
                data = self.get_metadata(f, filepath)
        if data:
            data.update([('Inode', inode), ('Status', status), ('User', user)])
        return data

    def get_metadata(self, f, filepath):
        """ Parse $I file and obtain metadata
        Args:
            f (str): $I file_object
            filepath (str): name of the mount path to $I file
        Returns:
            dict: keys = [Date, Size, File, OriginalName]
        """
        # For information about $I files structure:
        # https://df-stream.com/2016/04/fun-with-recycle-bin-i-files-windows-10/
        try:
            data = f.read()
            header = struct.unpack_from('B', data)[0]
        except Exception:
            self.logger().warning(
                'Unrecognized $I header for file: {}'.format(filepath))
            return {}
        try:
            if header == 2:  # windows 10
                name_length = struct.unpack_from('<i', data, 24)[0]
                file_name = data[28:28 + name_length *
                                 2].decode('utf-16').rstrip('\x00').replace(
                                     '\\', '/')
            elif header == 1:
                file_name = data[24:24 +
                                 520].decode('utf-16').rstrip('\x00').replace(
                                     '\\', '/')
            else:
                self.logger().warning(
                    'Unrecognized $I header for file: {}'.format(filepath))
                return {}
        except Exception:
            self.logger().warning(
                'Problems getting filename for file: {}'.format(filepath))
            file_name = ''
        try:
            size = struct.unpack_from('<q', data, 8)[0]
        except Exception:
            self.logger().warning(
                'Problems getting file size for file: {}'.format(filepath))
            size = 0
        try:
            deleted_time = ms_time_to_unix(
                struct.unpack_from('<q', data, 16)[0])
        except Exception as exc:
            self.logger().warning(
                'Problems getting deleted timestamp for file: {}. Err: {}'.
                format(filepath, exc))
            deleted_time = datetime.datetime(1970, 1,
                                             1).strftime("%Y-%m-%d %H:%M:%S")

        try:
            return OrderedDict([('Date', deleted_time), ('Size', size),
                                ('File', filepath),
                                ('OriginalName', file_name)])
        except Exception:
            self.logger().info(
                'Wrong $I format or missing field: {}'.format(filepath))
            return {}

    def save_recycle_files(self, output_file, partition=None, sorting=True):
        """ Sort recycle bin files by date and save to 'output_file' csv. """
        if not (len(self.i_files) or len(self.r_files)):
            self.logger().info('No RecycleBin files found{}.'.format(
                ' in partition {}'.format(partition if partition else '')))
            return
        if sorting:
            self.RB_files = list(self.i_files.values()) + self.r_files
            self.RB_files = sorted(self.RB_files, key=lambda it: it['Date'])
        else:
            self.RB_files = chain(self.i_files.values(), self.r_files)

        check_file(output_file, delete_exists=True)
        save_csv(self.RB_files,
                 outfile=output_file,
                 quoting=0,
                 file_exists='OVERWRITE')

    def generate_SID_user(self, partition):
        rip = self.config.get('plugins.common', 'rip', '/opt/regripper/rip.pl')

        try:
            software = self.locate_hives(partition)['software']
            # software = GetFiles(self.config, vss=self.myflag("vss")).search('{}/windows/system32/config/SOFTWARE$'.format(partition))[0]
            # software = os.path.join(self.myconfig('casedir'), software)
        except (KeyError, TypeError):
            self.logger().warning(
                'No Software registry file found for partition {}'.format(
                    partition))
            return {}

        output_profilelist = subprocess.check_output(
            [rip, "-r", software, "-p", 'profilelist']).decode()
        # output_samparse = subprocess.check_output([rip, "-r", sam, "-p", 'samparse']).decode()

        us = {}
        is_path = False
        for i in output_profilelist.split('\n'):
            if i.startswith("Path"):
                mo = re.search("Users.(.*)", i)
                if mo is not None:
                    user = mo.group(1)
                    is_path = True
                else:
                    mo = re.search("Documents and Settings.([^\n]*)", i)
                    if mo is not None:
                        user = mo.group(1)
                        is_path = True
            else:
                if i.startswith("SID") and is_path:
                    sid = i.split(':')[1][1:]
                    is_path = False
                    us[sid] = user
        return us

    def get_user_from_SID(self, SID, partition):
        """ Return the user associated with a SID.
        Search in other partitions and vss for a user with same SID if not found in current partition. """
        try:
            return self.sid_user[partition][SID]
        except (TypeError, KeyError):
            self.logger().debug(
                'SID {} does not have an associated user in partition {}'.
                format(SID, partition))
        for p in {**self.partitions, **self.vss_partitions}:
            if p != partition:
                try:
                    return self.sid_user[p][SID]
                except (TypeError, KeyError):
                    continue
        return SID

    def locate_hives(self, partition):
        """ Return the path to the main hives, as a dictionary. """
        # it can also be done with GetFiles
        part_dir = os.path.join(self.mountdir, partition)
        folder_combinations = product(
            *((c.capitalize(), c.upper(), c)
              for c in ['windows', 'system32', 'config']))
        for dir in (os.path.join(*i) for i in folder_combinations):
            config_dir = os.path.join(part_dir, dir)
            if os.path.exists(config_dir):
                break
        else:  # Config folder not found
            self.logger().info(
                'No config directory found for partition {}'.format(partition))
            return

        hives = {}
        for j in os.listdir(config_dir):
            if j.lower() in ["software", "sam", "system", "security"]:
                hives[j.lower()] = os.path.join(config_dir, j)
                continue

        return hives
Esempio n. 12
0
class LnkExtract(base.job.BaseModule):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.dicID = load_appID(myconfig=self.myconfig)
        self.vss = self.myflag('vss')
        self.encoding = self.myconfig('encoding', 'cp1252')

    def read_config(self):
        super().read_config()
        # appid is a file relating applications id with names. https://github.com/EricZimmerman/JumpList/blob/master/JumpList/Resources/AppIDs.txt
        self.set_default_config(
            'appid',
            os.path.join(self.config.config['windows']['plugindir'],
                         'appID.txt'))

    def run(self, path=""):
        """ Parses lnk files, jumlists and customdestinations

        """
        self.logger().info("Extraction of lnk files")

        self.Files = GetFiles(self.config, vss=self.myflag("vss"))
        self.filesystem = FileSystem(self.config)
        self.mountdir = self.myconfig('mountdir')

        lnk_path = self.myconfig('{}outdir'.format('v' if self.vss else ''))
        check_folder(lnk_path)

        users = get_user_list(self.mountdir, self.vss)
        artifacts = {
            'lnk': {
                'filename': "{}_lnk.csv",
                'regex': r"{}/.*\.lnk$",
                'function': self.lnk_parser
            },
            'autodest': {
                'filename': "{}_jl.csv",
                'regex': r"{}/.*\.automaticDestinations-ms$",
                'function': self.automaticDest_parser
            },
            'customdest': {
                'filename': "{}_jlcustom.csv",
                'regex': r"{}/.*\.customDestinations-ms$",
                'function': self.customDest_parser
            }
        }

        for user in users:
            usr = "******".format(user.split("/")[0], user.split("/")[2])

            for a_name, artifact in artifacts.items():
                out_file = os.path.join(lnk_path,
                                        artifact['filename'].format(usr))
                files_list = list(
                    self.Files.search(artifact['regex'].format(user)))
                self.logger().info(
                    "Founded {} {} files for user {} at {}".format(
                        len(files_list), a_name,
                        user.split("/")[-1],
                        user.split("/")[0]))
                if len(files_list) > 0:
                    save_csv(artifact['function'](files_list),
                             config=self.config,
                             outfile=out_file,
                             quoting=0,
                             file_exists='OVERWRITE')
                    self.logger().info(
                        "{} extraction done for user {} at {}".format(
                            a_name,
                            user.split("/")[-1],
                            user.split("/")[0]))

        self.logger().info("RecentFiles extraction done")
        return []

    def lnk_parser(self, files_list):
        """ Parses all '.lnk' files found for a user.

        Parameters:
            files_list (list): list of automaticDestinations-ms files to parse (relative to casedir)
        """

        headers = [
            "mtime", "atime", "ctime", "btime", "drive_type", "drive_sn",
            "machine_id", "path", "network_path", "size", "atributes",
            "description", "command line arguments", "file_id", "volume_id",
            "birth_file_id", "birth_volume_id", "f_mtime", "f_atime",
            "f_ctime", "file"
        ]

        data = self.filesystem.get_macb(files_list, vss=self.vss)

        for file in files_list:
            lnk = Lnk(os.path.join(self.myconfig('casedir'), file),
                      self.encoding,
                      logger=self.logger())

            lnk = lnk.get_lnk_info()

            if lnk == -1:
                self.logger().warning("Problems with file {}".format(file))
                yield OrderedDict(
                    zip(
                        headers, data[file] + [
                            "", "", "", "", "", "", "", "", "", "", "", "", "",
                            "", "", "", file
                        ]))
            else:
                yield OrderedDict(zip(headers, data[file] + lnk + [file]))

    def automaticDest_parser(self, files_list):
        """ Parses automaticDest files

        Parameters:
            files_list (list): list of automaticDestinations-ms files to parse
        """

        # TODO: Get the default Windows encoding and avoid trying many
        # TODO: Parse the files without DestList

        # Differences in DestList between versions at:
        # https://cyberforensicator.com/wp-content/uploads/2017/01/1-s2.0-S1742287616300202-main.2-14.pdf
        # Obtain the JumpList version from the header of DestList entry
        for jl in files_list:
            try:
                ole = olefile.OleFileIO(
                    os.path.join(self.myconfig('casedir'), jl))
            except Exception as exc:
                self.logger().warning(
                    "Problems creating OleFileIO with file {}\n{}".format(
                        jl, exc))
                continue
            try:
                data = ole.openstream('DestList').read()
                header_version, = struct.unpack('<L', data[0:4])
                version = 'w10' if header_version >= 3 else 'w7'
                self.logger().info(
                    "Windows version of Jumplists: {}".format(version))
                break
            except Exception:
                continue
            finally:
                ole.close()
        if 'version' not in locals():
            self.logger().warning(
                "Can't determine windows version. Assuming w10")
            version = 'w10'  # default

        # Offsets for diferent versions
        entry_ofs = {'w10': 130, 'w7': 114}
        id_entry_ofs = {'w10': ['<L', 88, 92], 'w7': ['<Q', 88, 96]}
        sz_ofs = {'w10': [128, 130], 'w7': [112, 114]}
        final_ofs = {'w10': 4, 'w7': 0}

        headers = [
            "Open date", "Application", "drive_type", "drive_sn", "machine_id",
            "path", "network_path", "size", "atributes", "description",
            "command line arguments", "file_id", "volume_id", "birth_file_id",
            "birth_volume_id", "f_mtime", "f_atime", "f_ctime", "file"
        ]

        # Main loop
        for jl in files_list:
            self.logger().info("Processing Jump list : {}".format(
                jl.split('/')[-1]))
            try:
                ole = olefile.OleFileIO(
                    os.path.join(self.myconfig('casedir'), jl))
            except Exception as exc:
                self.logger().warning(
                    "Problems creating OleFileIO with file {}\n{}".format(
                        jl, exc))
                continue

            if not ole.exists('DestList'):
                self.logger().warning(
                    "File {} does not have a DestList entry and can't be parsed"
                    .format(jl))
                ole.close()
                continue
            else:
                if not (len(ole.listdir()) - 1):
                    self.logger().warning(
                        "Olefile has detected 0 entries in file {}\nFile will be skipped"
                        .format(jl))
                    ole.close()
                    continue

                dest = ole.openstream('DestList')
                data = dest.read()
                if len(data) == 0:
                    self.logger().warning(
                        "No DestList data in file {}\nFile will be skipped".
                        format(jl))
                    ole.close()
                    continue
                self.logger().debug("DestList lenght: {}".format(
                    ole.get_size("DestList")))

                try:
                    # Double check number of entries
                    current_entries, pinned_entries = struct.unpack(
                        "<LL", data[4:12])
                    self.logger().debug(
                        "Current entries: {}".format(current_entries))
                except Exception as exc:
                    self.logger().warning(
                        "Problems unpacking header Destlist with file {}\n{}".
                        format(jl, exc))
                    # continue

                ofs = 32  # Header offset
                while ofs < len(data):
                    stream = data[ofs:ofs + entry_ofs[version]]
                    name = ""
                    try:
                        name = stream[72:88].decode()
                    except Exception:
                        self.logger().info("utf-8 decoding failed")
                        try:
                            name = stream[72:88].decode("cp1252")
                        except Exception as exc:
                            self.logger().info("cp1252 decoding failed")
                            self.logger().warning(
                                "Problems decoding name with file {}\n{}".
                                format(jl, exc))

                    name = name.replace("\00", "")

                    # Get id_entry of next entry
                    try:
                        id_entry, = struct.unpack(
                            id_entry_ofs[version][0],
                            stream[id_entry_ofs[version][1]:
                                   id_entry_ofs[version][2]])
                    except Exception as exc:
                        self.logger().warning(
                            "Problems unpacking id_entry with file {}\n{}".
                            format(jl, exc))
                        # self.logger().debug(stream[id_entry_ofs[version][1]:id_entry_ofs[version][2]])
                        break
                    id_entry = format(id_entry, '0x')

                    # Get MSFILETIME
                    try:
                        time0, time1 = struct.unpack("II", stream[100:108])
                    except Exception as exc:
                        self.logger().warning(
                            "Problems unpacking MSFILETIME with file {}\n{}".
                            format(jl, exc))
                        break

                    timestamp = getFileTime(time0, time1)

                    # sz: Length of Unicodestring data
                    try:
                        sz, = struct.unpack(
                            "h", stream[sz_ofs[version][0]:sz_ofs[version][1]])
                        # self.logger().debug("sz: {}".format(sz))
                    except Exception as exc:
                        self.logger().warning(
                            "Problems unpaking unicode string size with file {}\n{}"
                            .format(jl, exc))
                        # self.logger().debug(stream[sz_ofs[version][0]:sz_ofs[version][1]])
                        break

                    ofs += entry_ofs[version]
                    sz2 = sz * 2  # Unicode 2 bytes

                    # Get unicode path
                    path = ""
                    try:
                        path = data[ofs:ofs + sz2].decode()
                    except UnicodeDecodeError:
                        try:
                            path = data[ofs:ofs + sz2].decode("iso8859-15")
                        except Exception as exc:
                            self.logger().warning(
                                "Problems decoding path with file {}\n{}".
                                format(jl, exc))
                    path = path.replace("\00", "")

                    temp = tempfile.NamedTemporaryFile()
                    # Move to the next entry
                    ofs += sz2 + final_ofs[version]
                    try:
                        aux = ole.openstream(id_entry)
                    except Exception as exc:
                        self.logger().warning(
                            "Problems with file {}\n{}".format(jl, exc))
                        self.logger().warning("ole.openstream failed")
                        temp.close()
                        break
                    datos = aux.read()
                    temp.write(datos)
                    temp.flush()

                    # Extract lnk data
                    lnk = Lnk(temp.name, self.encoding, logger=self.logger())
                    lnk = lnk.get_lnk_info()

                    temp.close()

                    n_hash = os.path.basename(jl).split(".")[0]
                    if lnk == -1:
                        yield OrderedDict(
                            zip(headers, [
                                time.strftime("%Y-%m-%dT%H:%M:%SZ",
                                              time.gmtime(timestamp)),
                                self.dicID.get(n_hash, n_hash), "", "", "", "",
                                "", "", "", "", "", "", "", "", "", "", "", jl
                            ]))
                    else:
                        yield OrderedDict(
                            zip(headers, [
                                time.strftime("%Y-%m-%dT%H:%M:%SZ",
                                              time.gmtime(timestamp)),
                                self.dicID.get(n_hash, n_hash)
                            ] + lnk + [jl]))

            ole.close()

        self.logger().info("Jumlists parsed")

    def customDest_parser(self, files_list):
        """ Parses customDest files

        Parameters:
            files_list (list): list of customDestinations-ms files to parse
        """
        # regex = re.compile("\x4C\x00\x00\x00\x01\x14\x02\x00")
        split_str = b"\x4C\x00\x00\x00\x01\x14\x02\x00"

        headers = [
            "Application", "drive_type", "drive_sn", "machine_id", "path",
            "network_path", "size", "atributes", "description",
            "command line arguments", "file_id", "volume_id", "birth_file_id",
            "birth_volume_id", "f_mtime", "f_atime", "f_ctime", "file"
        ]

        for jl in files_list:
            with open(os.path.join(self.myconfig('casedir'), jl), "rb") as f:
                data = f.read()

            lnks = data.split(split_str)
            for lnk_b in lnks[1:]:
                f_temp = tempfile.NamedTemporaryFile()
                f_temp.write(b"\x4C\x00\x00\x00\x01\x14\x02\x00" + lnk_b)
                f_temp.flush()
                lnk = Lnk(f_temp.name, self.encoding, logger=self.logger())
                lnk = lnk.get_lnk_info()
                f_temp.close()

                n_hash = os.path.basename(jl).split(".")[0]
                if lnk == -1:
                    yield OrderedDict(
                        zip(headers, [
                            self.dicID.get(n_hash, n_hash), "", "", "", "", "",
                            "", "", "", "", "", "", "", jl
                        ]))
                else:
                    yield OrderedDict(
                        zip(headers,
                            [self.dicID.get(n_hash, n_hash)] + lnk + [jl]))

        self.logger().info("customDestinations parsed")
Esempio n. 13
0
class UsnJrnl(base.job.BaseModule):
    def run(self, path=""):
        """ Parse UsnJrnl files of a disk """
        self.vss = self.myflag('vss')
        disk = getSourceImage(self.myconfig)

        self.usn_path = self.myconfig(
            'voutdir') if self.vss else self.myconfig('outdir')
        check_folder(self.usn_path)
        self.usn_jrnl_file = os.path.join(self.usn_path, "UsnJrnl")
        self.filesystem = FileSystem(self.config, disk=disk)

        for p in disk.partitions:
            if not p.isMountable:
                continue
            if not self.vss:
                pname = ''.join(['p', p.partition])
                self._parse_usnjrnl(pname)
            else:
                for v, dev in p.vss.items():
                    if dev == "":
                        continue
                    self._parse_usnjrnl(v)

        # Delete the temporal UsnJrnl dumped file
        if os.path.exists(self.usn_jrnl_file):
            os.remove(self.usn_jrnl_file)
        return []

    def _parse_usnjrnl(self, pname):
        """ Get and parses UsnJrnl file for a partition """
        inode = self.filesystem.get_inode_from_path('/$Extend/$UsnJrnl:$J',
                                                    pname)

        if inode == -1:
            self.logger().warning(
                "Problem getting UsnJrnl from partition {}. File may not exist"
                .format(pname))
            return

        # Dumps UsnJrnl file from the data stream $J
        self.logger().info(
            "Dumping journal file of partition {}".format(pname))
        if self.vss:
            self.filesystem.icat(inode,
                                 pname,
                                 output_filename=self.usn_jrnl_file,
                                 attribute="$J",
                                 vss=True)
        else:
            self.filesystem.icat(inode,
                                 pname,
                                 output_filename=self.usn_jrnl_file,
                                 attribute="$J")
        self.logger().info(
            "Extraction of journal file completed for partition {}".format(
                pname))

        self.logger().info("Creating file {}".format(
            os.path.join(self.usn_path, "UsnJrnl_{}.csv".format(pname))))
        if os.stat(self.usn_jrnl_file).st_size > 0:
            # Create dump file
            records = self.parseUsn(infile=self.usn_jrnl_file, partition=pname)
            outfile = os.path.join(self.usn_path,
                                   "UsnJrnl_dump_{}.csv".format(pname))
            save_csv(records,
                     outfile=outfile,
                     file_exists='OVERWRITE',
                     quoting=0)
            # Create summary file from dump file
            filtered_records = self.summaryUsn(infile=outfile, partition=pname)
            out_summary = os.path.join(self.usn_path,
                                       "UsnJrnl_{}.csv".format(pname))
            save_csv(filtered_records,
                     outfile=out_summary,
                     file_exists='OVERWRITE',
                     quoting=0)

    def parseUsn(self, infile, partition):
        """ Generator that returns a dictionary for every parsed record in UsnJrnl file.

        Args:
            input_file (str): path to UsnJrnl file
            partition (str): partition name
        """
        journalSize = os.path.getsize(infile)
        self.folders = dict()  # Stores filenames associated to directories

        with open(infile, "rb") as f:
            dataPointer = self.findFirstRecord(f)
            f.seek(dataPointer)

            # Estimate number of entries in UsnJrnl for progressBar.
            # Since 96 is a pessimistic average, process should terminate before progressBar reaches 100%.
            estimated_entries = int((journalSize - dataPointer) / 96)
            with tqdm(total=estimated_entries,
                      desc='Parse_UsnJrnl dump_{}'.format(partition)) as pbar:

                total_entries_found = 0
                while True:
                    nextRecord = self.findNextRecord(f, journalSize)
                    total_entries_found += 1
                    if not nextRecord:
                        pbar.update(estimated_entries - total_entries_found)
                        break
                    u = Usn(f)
                    f.seek(nextRecord)
                    try:
                        parent_mft = str(u.parentMftEntryNumber)
                    except Exception:
                        parent_mft = -1

                    if str(u.fileAttributes).find(
                            "DIRECTORY") > -1 and u.mftEntryNumber != -1:
                        self.folders[u.mftEntryNumber] = [
                            u.filename, u.parentMftEntryNumber
                        ]

                    if u.mftEntryNumber != -1:
                        yield OrderedDict([('Date', u.timestamp),
                                           ('MFT Entry', u.mftEntryNumber),
                                           ('Parent MFT Entry', parent_mft),
                                           ('Filename', u.filename),
                                           ('File Attributes',
                                            u.fileAttributes),
                                           ('Reason', u.reason)])
                    pbar.update()
                self.logger().info(
                    '{} journal entries found in partition {}'.format(
                        total_entries_found, partition))

    def summaryUsn(self, infile, partition):
        """ Return the relevant records from the UsnJrnl, adding full_path to filename """
        partition = infile.split(
            '_')[-1][:-4]  # infile in format 'UsnJrnl_dump_p06.csv'
        self.inode_fls = self.filesystem.load_path_from_inode(
            partition=partition, vss=self.vss)
        self.logger().debug(
            'Correctly loaded inode-name relation file for partiton {}'.format(
                partition))

        folders = self.complete_dir(self.folders, partition)

        # Fields to filter
        fields = "(RENAME_OLD_NAME|RENAME_NEW_NAME|FILE_DELETE CLOSE|FILE_CREATE CLOSE)"
        out_fields = [
            'Date', 'Filename', 'Full Path', 'File Attributes', 'Reason',
            'MFT Entry', 'Parent MFT Entry', 'Reliable Path'
        ]

        base_dir = os.path.join(self.myconfig('source'), 'mnt', partition)
        for record in base.job.run_job(self.config,
                                       'base.input.CSVReader',
                                       path=[infile]):
            if re.search(fields, record['Reason']):
                try:
                    # Give priority to folders already found in journal
                    record['Full Path'] = os.path.join(
                        base_dir, folders[int(record['Parent MFT Entry'])][0],
                        record['Filename'])
                    record['Reliable Path'] = folders[int(
                        record['Parent MFT Entry'])][1]
                except Exception:
                    # parent inode not found in journal, inode info is used to complete path
                    record['Full Path'] = os.path.join(
                        self.inode_fls[record['Parent MFT Entry']][0],
                        record['Filename'])
                    record['Reliable Path'] = False

                yield OrderedDict([(i, record[i]) for i in out_fields])

    @staticmethod
    def findFirstRecord(infile):
        """ Returns a pointer to the first USN record found

        Modified version of Dave Lassalle's "parseusn.py"
        https://github.com/sans-dfir/sift-files/blob/master/scripts/parseusn.py

        Args:
            infile (str): filename
        """
        while True:
            data = infile.read(6553600).lstrip(b'\x00')
            if data:
                return infile.tell() - len(data)

    @staticmethod
    def findNextRecord(infile, journalSize):
        """Often there are runs of null bytes between USN records

        This function reads through them and returns a pointer to the start of the next USN record

        Args:
            infile (str): filename
            journalSize (int): size of journal file
        """
        while True:
            try:
                recordLength = struct.unpack_from("I", infile.read(4))[0]
                if recordLength:
                    infile.seek(-4, 1)
                    return (infile.tell() + recordLength)
            except struct.error:
                if infile.tell() >= journalSize:
                    return False

    def complete_dir(self, folders, partition):
        """ Reconstructs absolutepaths of inodes from information of UsnJrnl.
        If it's not possible to reach root folder (inode 5), it uses $MFT entry. Such files are marked as unreliable

        Args:
            folders (list): folders
            partition (str): partiton name
        """

        final_folders = {}  # keys:inode; values:(filename, reliable)
        final_folders[5] = ""  # Root directory
        for entr in folders.keys():
            name = ""
            parent = folders[entr][1]
            actual = entr

            while True:
                if parent == 5:
                    final_folders[entr] = (name, True)
                    break
                if parent in final_folders.keys():
                    final_folders[entr] = (os.path.join(
                        final_folders[parent][0], folders[actual][0],
                        name), final_folders[parent][1])
                    break

                name = os.path.join(folders[actual][0], name)
                actual = parent
                try:
                    parent = folders[parent][1]
                    continue
                except Exception:
                    # Use MFT to complete the path
                    try:
                        final_folders[entr] = (os.path.join(
                            self.inode_fls[str(parent)][0], name), False)
                        break
                    except Exception:
                        final_folders[entr] = (os.path.join("*", name), False)
                        break

        return final_folders