Beispiel #1
0
    def _skip_file(self, d, files):
        '''
        The function passed into shutil.copytree to ignore certain patterns and filetypes
        Currently Skipped
        Directories - handled by copytree
        Symlinks - handled by copytree
        Write-only files (stuff in /proc)
        Binaries (can't scan them)
        '''
        skip_list = []
        for f in files:
            f_full = os.path.join(d, f)
            if not os.path.isdir(f_full):
                if not os.path.islink(f_full):
                    #mode = oct(os.stat(f_full).st_mode)[-3:]
                    # executing as root makes this first if clause useless.
                    # i thought i'd already removed it. - jduncan
                    #if mode == '200' or mode == '444' or mode == '400':
                    #    skip_list.append(f)
                    mime_type = content_type.from_file(f_full)
                    if f == 'insights_archive.txt':
                        # don't exclude this file! we need it to parse core collection archives
                        continue
                    if 'text' not in mime_type and 'json' not in mime_type:
                        skip_list.append(f)

        return skip_list
Beispiel #2
0
    def _assert_type(self, _input, is_buffer=False):
        self.content_type = content_type.from_file(_input)

        if self.content_type not in self.TAR_FLAGS:
            raise InvalidContentType(self.content_type)

        inner_type = content_type.from_file_inner(_input)

        if inner_type != 'application/x-tar':
            raise InvalidArchive('No compressed tar archive')
Beispiel #3
0
    def _extract_sosreport(self, path):

        self.logger.con_out("Beginning SOSReport Extraction")
        compression_sig = content_type.from_file(path).lower()
        if 'directory' in compression_sig:
            self.logger.info('%s appears to be a %s - continuing', path,
                             compression_sig)
            # Clear out origin_path as we don't have one
            self.origin_path = None
            return path

        elif 'compressed data' in compression_sig:
            if compression_sig == 'xz compressed data':
                #This is a hack to account for the fact that the tarfile library doesn't
                #handle lzma (XZ) compression until version 3.3 beta
                try:
                    self.logger.info(
                        'Data Source Appears To Be LZMA Encrypted Data - decompressing into %s',
                        self.origin_path)
                    self.logger.info('LZMA Hack - Creating %s',
                                     self.origin_path)
                    os.system('mkdir %s' % self.origin_path)
                    os.system('tar -xJf %s -C %s' % (path, self.origin_path))
                    return_path = os.path.join(self.origin_path,
                                               os.listdir(self.origin_path)[0])

                    return return_path

                except Exception as e:  # pragma: no cover
                    self.logger.exception(e)
                    raise Exception(
                        'DecompressionError, Unable to decrypt LZMA compressed file %s',
                        path)

            else:
                p = tarfile.open(path, 'r')

                self.logger.info(
                    'Data Source Appears To Be %s - decompressing into %s',
                    compression_sig, self.origin_path)
                try:
                    p.extractall(self.origin_path)
                    return_path = os.path.join(
                        self.origin_path, os.path.commonprefix(p.getnames()))

                    return return_path

                except Exception as e:  # pragma: no cover
                    self.logger.exception(e)
                    raise Exception(
                        "DeCompressionError: Unable to De-Compress %s into %s",
                        path, self.origin_path)
        else:  # pragma: no cover
            raise Exception(
                'CompressionError: Unable To Determine Compression Type')
Beispiel #4
0
def extract(path, timeout=None, extract_dir=None):
    content_type = from_file(path)
    if content_type == "application/zip":
        extractor = ZipExtractor(timeout=timeout)
    else:
        extractor = TarExtractor(timeout=timeout)

    tmp_dir = None
    try:
        tmp_dir = extractor.from_path(path, extract_dir=extract_dir).tmp_dir
        content_type = extractor.content_type
        yield Extraction(tmp_dir, content_type)
    finally:
        if tmp_dir:
            fs.remove(tmp_dir, chmod=True)
Beispiel #5
0
def analyze(paths, excludes=None):
    if not isinstance(paths, list):
        paths = [paths]

    results = []
    for path in paths:
        if content_type.from_file(path) == "text/plain":
            results.append(_load(path))
        elif os.path.isdir(path):
            results.extend(_process(path, excludes))
        else:
            with extract(path) as ex:
                results.extend(_process(ex.tmp_dir, excludes))

    return Result(children=results)