def generate_and_store_file_objects(self, file_paths, tmp_dir, parent): extracted_files = {} for item in file_paths: if not file_is_empty(item): current_file = FileObject(file_path=item) current_virtual_path = '{}|{}|{}'.format( parent.get_base_of_virtual_path( parent.get_virtual_file_paths()[ parent.get_root_uid()][0]), parent.get_uid(), get_chroot_path_excluding_extracted_dir( make_unicode_string(item), tmp_dir)) current_file.temporary_data[ 'parent_fo_type'] = get_file_type_from_path( parent.file_path)['mime'] if current_file.get_uid( ) in extracted_files: # the same file is extracted multiple times from one archive extracted_files[current_file.get_uid()].virtual_file_path[ parent.get_root_uid()].append(current_virtual_path) else: self.db_interface.set_unpacking_lock(current_file.uid) self.file_storage_system.store_file(current_file) current_file.virtual_file_path = { parent.get_root_uid(): [current_virtual_path] } current_file.parent_firmware_uids.add( parent.get_root_uid()) extracted_files[current_file.get_uid()] = current_file return extracted_files
def _find_relevant_files(self, tmp_dir: TemporaryDirectory): result = [] for f in Path(tmp_dir.name).glob('**/*'): if f.is_file() and not f.is_symlink(): file_type = get_file_type_from_path(f.absolute()) if self._has_relevant_type(file_type): result.append(('/{}'.format(f.relative_to(Path(self.root_path))), file_type['full'])) return result
def process_object(self, file_object): ''' This function must be implemented by the plugin. Analysis result must be a list stored in file_object.processed_analysis[self.NAME] ''' file_type = get_file_type_from_path(file_object.file_path) file_object.processed_analysis[self.NAME] = file_type file_object.processed_analysis[self.NAME]['summary'] = self._get_summary(file_object.processed_analysis[self.NAME]) return file_object
def get_unpack_status(self, fo, extracted_fos): fo.processed_analysis['unpacker']['summary'] = [] fo_entropy = avg_entropy(fo.binary) fo.processed_analysis['unpacker']['entropy'] = fo_entropy if len(fo.files_included) < 1: if get_file_type_from_path(fo.file_path)['mime'] in self.VALID_COMPRESSED_FILE_TYPES: fo.processed_analysis['unpacker']['summary'] = ['unpacked'] else: if is_compressed(fo.binary, compress_entropy_threshold=self.config['ExpertSettings'].getfloat('unpack_threshold', 0.7), classifier=avg_entropy): fo.processed_analysis['unpacker']['summary'] = ['packed'] else: fo.processed_analysis['unpacker']['summary'] = ['unpacked'] else: self._detect_unpack_loss(fo, extracted_fos)
def unpack_function(file_path, tmp_dir): ''' file_path specifies the input file. tmp_dir should be used to store the extracted files. ''' mime_type = get_file_type_from_path(file_path)['mime'] type_parameter = '-t {}'.format( type_dict[mime_type]) if mime_type in type_dict else '' mount_dir = TemporaryDirectory() output = execute_shell_command('sudo mount {} -v -o ro,loop {} {}'.format( type_parameter, file_path, mount_dir.name)) output += execute_shell_command('sudo cp -av {}/* {}/'.format( mount_dir.name, tmp_dir)) output += execute_shell_command('sudo umount -v {}'.format(mount_dir.name)) mount_dir.cleanup() return {'output': output}
def extract_files_from_file(self, file_path, tmp_dir, file_depth=0): current_unpacker = self.get_unpacker( get_file_type_from_path(file_path)['mime'], file_depth) return self._extract_files_from_file_using_specific_unpacker( file_path, tmp_dir, current_unpacker)
def test_get_file_type_of_internal_link_representation(self): file_type = get_file_type_from_path(os.path.join(get_test_data_dir(), 'symbolic_link_representation')) self.assertEqual(file_type['full'], 'symbolic link to \'/tmp\'') self.assertEqual(file_type['mime'], 'inode/symlink')
def test_get_file_type_custom_magic(self): file_type = get_file_type_from_path('{}/helperFunctions/ros_header'.format(get_test_data_dir())) self.assertEqual(file_type['mime'], 'firmware/ros', 'mime type not correct') self.assertEqual(file_type['full'], 'ROS Container', 'full type not correct')
def test_get_file_type_system_magic(self): file_type = get_file_type_from_path('{}/container/test.zip'.format(get_test_data_dir())) self.assertEqual(file_type['mime'], 'application/zip', 'mime type not correct') self.assertEqual(file_type['full'], 'Zip archive data, at least v2.0 to extract', 'full type not correct')