def generate_and_store_file_objects(self, file_paths: List[Path], extraction_dir: Path, parent: FileObject): extracted_files = {} for item in file_paths: if not file_is_empty(item): current_file = FileObject(file_path=str(item)) base = get_base_of_virtual_path( parent.get_virtual_file_paths()[parent.get_root_uid()][0]) current_virtual_path = join_virtual_path( base, parent.uid, get_relative_object_path(item, extraction_dir)) current_file.temporary_data[ 'parent_fo_type'] = get_file_type_from_path( parent.file_path)['mime'] if current_file.uid in extracted_files: # the same file is extracted multiple times from one archive extracted_files[current_file.uid].virtual_file_path[ parent.get_root_uid()].append(current_virtual_path) else: self.db_interface.set_unpacking_lock(current_file.uid) self.file_storage_system.store_file(current_file) current_file.virtual_file_path = { parent.get_root_uid(): [current_virtual_path] } current_file.parent_firmware_uids.add( parent.get_root_uid()) extracted_files[current_file.uid] = current_file return extracted_files
def generate_and_store_file_objects(self, file_paths: List[Path], extractor_dir: str, parent: FileObject): extracted_files = {} for item in file_paths: if not file_is_empty(item): current_file = FileObject(file_path=str(item)) current_virtual_path = '{}|{}|{}'.format( parent.get_base_of_virtual_path( parent.get_virtual_file_paths()[ parent.get_root_uid()][0]), parent.get_uid(), get_object_path_excluding_fact_dirs( make_unicode_string(str(item)), str(Path(extractor_dir, 'files')))) current_file.temporary_data[ 'parent_fo_type'] = get_file_type_from_path( parent.file_path)['mime'] if current_file.get_uid( ) in extracted_files: # the same file is extracted multiple times from one archive extracted_files[current_file.get_uid()].virtual_file_path[ parent.get_root_uid()].append(current_virtual_path) else: self.db_interface.set_unpacking_lock(current_file.uid) self.file_storage_system.store_file(current_file) current_file.virtual_file_path = { parent.get_root_uid(): [current_virtual_path] } current_file.parent_firmware_uids.add( parent.get_root_uid()) extracted_files[current_file.get_uid()] = current_file return extracted_files
def remove_false_positive_archives(self) -> str: for file_path in self.unpack_directory.iterdir(): file_type = get_file_type_from_path(file_path)['mime'] # If the carved file is the same as the original file, then we don't want to keep it. if self.check_file_size_same_as_original(file_path): continue if file_type == 'application/x-tar' or self._is_possible_tar( file_type, file_path): self.check_archives_validity( file_path, 'tar -tvf {}', 'does not look like a tar archive') elif file_type == 'application/x-xz': self.check_archives_validity(file_path, 'xz -c -d {} | wc -c') elif file_type == 'application/gzip': self.check_archives_validity(file_path, 'gzip -c -d {} | wc -c') elif file_type in [ 'application/zip', 'application/x-7z-compressed', 'application/x-lzma' ]: self.check_archives_validity(file_path, '7z l {}', 'ERROR') elif file_type in ['compression/zlib', 'application/zlib']: self.check_zlib_archive_validity(file_path) return '\n'.join(self.screening_logs)
def _find_relevant_files(self, extracted_files_dir: Path): result = [] for path in safe_rglob(extracted_files_dir): if path.is_file() and not path.is_symlink(): file_type = get_file_type_from_path(path.absolute()) if self._has_relevant_type(file_type): result.append(('/{}'.format(path.relative_to(Path(self.root_path))), file_type['full'])) return result
def unpack_function(file_path, tmp_dir): mime_type = get_file_type_from_path(file_path)['mime'] if mime_type == 'filesystem/dosmbr': output = _mount_from_boot_record(file_path, tmp_dir) else: output = _mount_single_filesystem(file_path, mime_type, tmp_dir) return {'output': output}
def process_object(self, file_object): ''' This function must be implemented by the plugin. Analysis result must be a list stored in file_object.processed_analysis[self.NAME] ''' file_type = get_file_type_from_path(file_object.file_path) file_object.processed_analysis[self.NAME] = file_type file_object.processed_analysis[self.NAME]['summary'] = self._get_summary(file_object.processed_analysis[self.NAME]) return file_object
def _find_relevant_files(self, tmp_dir: TemporaryDirectory): result = [] for path in Path(tmp_dir.name).glob('**/*'): if path.is_file() and not path.is_symlink(): file_type = get_file_type_from_path(path.absolute()) if self._has_relevant_type(file_type): result.append( ('/{}'.format(path.relative_to(Path(self.root_path))), file_type['full'])) return result
def get_unpack_status(file_path: str, binary: bytes, extracted_files: List[Path], meta_data: Dict, config: ConfigParser): meta_data['summary'] = [] meta_data['entropy'] = avg_entropy(binary) if not extracted_files: if get_file_type_from_path(file_path)['mime'] in read_list_from_config(config, 'ExpertSettings', 'compressed_file_types')\ or not is_compressed(binary, compress_entropy_threshold=config.getfloat('ExpertSettings', 'unpack_threshold'), classifier=avg_entropy): meta_data['summary'] = ['unpacked'] else: meta_data['summary'] = ['packed'] else: _detect_unpack_loss(binary, extracted_files, meta_data, config.getint('ExpertSettings', 'header_overhead'))
def get_unpack_status(self, fo, extracted_fos): fo.processed_analysis['unpacker']['summary'] = [] fo_entropy = avg_entropy(fo.binary) fo.processed_analysis['unpacker']['entropy'] = fo_entropy if len(fo.files_included) < 1: if get_file_type_from_path( fo.file_path)['mime'] in self.VALID_COMPRESSED_FILE_TYPES: fo.processed_analysis['unpacker']['summary'] = ['unpacked'] else: if is_compressed(fo.binary, compress_entropy_threshold=self. config['ExpertSettings'].getfloat( 'unpack_threshold', 0.7), classifier=avg_entropy): fo.processed_analysis['unpacker']['summary'] = ['packed'] else: fo.processed_analysis['unpacker']['summary'] = ['unpacked'] else: self._detect_unpack_loss(fo, extracted_fos)
def get_file_type(path): path = pathlib.Path(path) # Make sure symlinks aren't followed if path.is_symlink(): return {"mime": "inode/symlink", "full": "symbolic link"} # get_file_type_from_path would raise IsADirectoryError if path.is_dir(): return {"mime": "directory", "full": "directory"} # Attempting to open this would stay stuck forever if path.is_fifo(): return {"mime": "inode/fifo", "full": "fifo"} # Don't attempt to open sockets if path.is_socket(): return {"mime": "inode/socket", "full": "socket"} return get_file_type_from_path(path)
def remove_false_positive_archives(self) -> str: for file_path in self.unpack_directory.iterdir(): file_type = get_file_type_from_path(file_path)['mime'] if file_type == 'application/x-tar' or self._is_possible_tar( file_type, file_path): self.check_archives_validity( file_path, 'tar -tvf {}', 'does not look like a tar archive') elif file_type == 'application/x-xz': self.check_archives_validity(file_path, 'xz -c -d {} | wc -c') elif file_type == 'application/gzip': self.check_archives_validity(file_path, 'gzip -c -d {} | wc -c') elif file_type in [ 'application/zip', 'application/x-7z-compressed', 'application/x-lzma' ]: self.check_archives_validity(file_path, '7z l {}', 'ERROR') return '\n'.join(self.screening_logs)
def extract_files_from_file(self, file_path: str, tmp_dir) -> Tuple[List, Dict]: current_unpacker = self.get_unpacker( get_file_type_from_path(file_path)['mime']) return self._extract_files_from_file_using_specific_unpacker( file_path, tmp_dir, current_unpacker)
def extract_files_from_file(self, file_path, tmp_dir, file_depth=0): current_unpacker = self.get_unpacker( get_file_type_from_path(file_path)['mime'], file_depth) return self._extract_files_from_file_using_specific_unpacker( file_path, tmp_dir, current_unpacker)