def deb_archive(element, open_file, filename, temp_files, uncompressed_path=None): """ Uncompress files in a Debian .deb archive. A .deb is a .ar archive with control.tar.gz (metadata) and data.tar.gz (actual package files). :param element: :param open_file: :param filename: :param temp_files: :param uncompressed_path: :return: """ if uncompressed_path is not None: # another filter has uncompressed this file return None extension = os.path.splitext(filename)[1] if extension != '.deb': return None new_filename = filename[0:-len(extension)] ar_file_obj = ArFile( name=new_filename, mode='r', fileobj=open_file) # allow to extract files from the .deb data_file_obj = ar_file_obj.extractfile( 'data.tar.gz') or ar_file_obj.extractfile('data.tar.gz/') # file descriptor to the data.tar.gz file if data_file_obj is None: return None result_file = mkdtemp() # create a new temp directory temp_files.add( result_file) # ensure that all extracted files are eventually deleted tar_file_obj = tarfile.open(name='data.tar.gz', fileobj=data_file_obj, mode='r:gz') # allow to extract files from the data.tar.gz from the .deb file # all these operations use the same original file descriptor (no need to really extract the data.tar.gz file) members = filter( lambda x: x.type in (tarfile.REGTYPE, tarfile.DIRTYPE) and os.path.abspath( os.path.join(result_file, x.name)).find(result_file) == 0, tar_file_obj.getmembers()) # filter members that should be created outside temp_dir (e.g. names begin with /) tar_file_obj.extractall(result_file, members) tar_file_obj.close() data_file_obj.close() ar_file_obj.close() if result_file and os.path.isdir(result_file): if len(os.listdir(result_file)[0:2]) == 1: result_file = os.path.join(result_file, os.listdir(result_file)[0]) else: tmp_dir = mkdtemp() temp_files.add(tmp_dir) tmp_dir = os.path.join(tmp_dir, new_filename) os.makedirs(tmp_dir) for name in os.listdir(result_file): os.rename(os.path.join(result_file, name), os.path.join(tmp_dir, name)) result_file = tmp_dir return result_file
def update_element(self, element): """ Extract some informations from element to prepare the repository :param element: Element to add to the repository :return: Unicode string containing meta-data ar -x control.tar.gz tar -xf control.tar.gz control """ archive_file = storage(settings.STORAGE_ARCHIVE).get_file( element.archive_key) ar_file = ArFile(element.filename, mode='r', fileobj=archive_file) control_file, control_file_name = self.get_subfile( ar_file, 'control.tar.') if control_file is None: raise InvalidRepositoryException( 'No control file found in .deb package') mode = 'r:*' if control_file_name.endswith('.xz') or control_file_name.endswith( '.lzma'): control_file_content = control_file.read() control_file_content_uncompressed = lzma.decompress( control_file_content) control_file.close() control_file = io.BytesIO(control_file_content_uncompressed) mode = 'r' tar_file = tarfile.open(name='control', mode=mode, fileobj=control_file) control_data = tar_file.extractfile('./control') # poulating different informations on the element control_data_value = control_data.read().decode('utf-8') control_data.close() tar_file.close() ar_file.close() archive_file.close() element.extra_data = control_data_value parsed_data = parse_control_data(control_data_value) element.archive = parsed_data['Package'] element.version = parsed_data['Version'] element.official_link = parsed_data.get('Homepage', '') element.long_description = parsed_data.get('Description', '')
def file_list(self, element, uid): cache_filename = 'filelist_%s' % element.sha256 key = storage(settings.STORAGE_CACHE).uid_to_key(uid) fileobj = storage(settings.STORAGE_CACHE).get_file(key, cache_filename) if fileobj is None: tmpfile = tempfile.NamedTemporaryFile(dir=settings.TEMP_ROOT) archive_file = storage(settings.STORAGE_ARCHIVE).get_file( element.archive_key, sub_path='') ar_file = ArFile(element.filename, mode='r', fileobj=archive_file) data_file, data_file_name = self.get_subfile(ar_file, 'data.tar.') mode = 'r:*' if data_file_name.endswith('.xz') or data_file_name.endswith( '.lzma'): data_file_content = data_file.read() data_file_content_uncompressed = lzma.decompress( data_file_content) data_file.close() data_file = io.BytesIO(data_file_content_uncompressed) mode = 'r' tar_file = tarfile.open(name='data', mode=mode, fileobj=data_file) members = tar_file.getmembers() members = filter(lambda x: x.isfile(), members) names = [x.path[2:] for x in members] tar_file.close() ar_file.close() archive_file.close() for name in names: tmpfile.write(('%s\n' % name).encode('utf-8')) tmpfile.flush() tmpfile.seek(0) storage(settings.STORAGE_CACHE).store_descriptor( uid, cache_filename, tmpfile) tmpfile.close() else: names = [line.strip().decode() for line in fileobj] fileobj.close() return names