def extract_rar(j): """ Extract a rar archive and update the job so that the extracted file is organized instead of the archive. :param dict j: organization job :return: extraction success :rtype: bool """ rf = RarFile(j['s']) media = None for f in rf.infolist(): _, ext = os.path.splitext(f.filename) if ext in MEDIA_EXTS: media = f break if not media: return False d, _ = os.path.split(j['s']) extract_path = os.path.join(d, media.filename) LOG.info(f"extract {j['s']} -> {extract_path}") rf.extract(rf.infolist()[0], path=extract_path) j['s'] = extract_path target_dir, _ = os.path.split(j['t']) j['t'] = get_target_path(extract_path, target_dir) return True
class RarFileExtractor(Extractor): def __init__(self, archive_path: str): self.archive_path = archive_path self.rarfile = RarFile(self.archive_path) self.namelist = { i.filename for i in self.rarfile.infolist() if not i.isdir() } def __exit__(self, *args): self.rarfile.close() def list_files(self) -> {}: return self.namelist def extract(self, file_name: str, dest_path: str, write_hook: Callable[[bytes], bytes] = None): with self.rarfile.open(file_name) as rf, open(dest_path, 'wb') as out: while True: chunk = rf.read(CHUNKSIZE) if not chunk: break out.write(write_hook(chunk) if write_hook else chunk)
def extrair_rar(rf: RarFile, caminho: str): """ Extrai csvs de um arquivo . rar. Parâmetros ---------- rf: Rarfile conteúdo do arquivo compactado. caminho: str caminho para pasta onde os arquivos devem ser salvos. """ ano = caminho.split('/')[-1] n_arquivos = len(rf.infolist()) for f in rf.infolist(): # Filtra arquivos csvs comprimidos if f.filename.endswith('csv'): filename = f.filename.split('/')[-1] print("\033[94m>> Baixando {}/{}\033[0m".format(ano, filename)) with open(caminho + '/' + filename, "wb") as of: of.write(rf.read(f.filename))
def unRarFull(fias_object): """Распаковка из полной базы ФИАС""" rf = RarFile(fiases.fias_data.WORK_DIR + fiases.fias_data.FIAS_XML_RAR) objectMatcher = re.compile(fias_object.FILE) print('') for f in rf.infolist(): if objectMatcher.match(f.filename): fias_object.xml_file = f.filename fias_object.xml_file_size = f.file_size if (fias_object.xml_file_size > 0): rf.extract(fias_object.xml_file, fiases.fias_data.WORK_DIR)
def unrarUpdate(fias_object): """Распаковка обновления """ rf = RarFile(fiases.fias_data.WORK_DIR + fiases.fias_data.FIAS_DELTA_XML_RAR) fias_objectMatcher = re.compile(fias_object.FILE) for f in rf.infolist(): if fias_objectMatcher.match(f.filename): fias_object.xml_delta_file = f.filename fias_object.xml_delta_file_size = f.file_size if (fias_object.xml_delta_file_size > 0): rf.extract(fias_object.xml_delta_file, fiases.fias_data.WORK_DIR)
def scan_archive(path): """Scan an archive from a `path`. :param str path: existing path to the archive. :return: the scanned video. :rtype: :class:`~subliminal.video.Video` """ # check for non-existing path if not os.path.exists(path): raise ValueError('Path does not exist') if not is_rarfile(path): raise ValueError("'{0}' is not a valid archive".format(os.path.splitext(path)[1])) dir_path, filename = os.path.split(path) logger.info('Scanning archive %r in %r', filename, dir_path) # Get filename and file size from RAR rar = RarFile(path) # check that the rar doesnt need a password if rar.needs_password(): raise ValueError('Rar requires a password') # raise an exception if the rar file is broken # must be called to avoid a potential deadlock with some broken rars rar.testrar() file_info = [f for f in rar.infolist() if not f.isdir() and f.filename.endswith(VIDEO_EXTENSIONS)] # sort by file size descending, the largest video in the archive is the one we want, there may be samples or intros file_info.sort(key=operator.attrgetter('file_size'), reverse=True) # no video found if not file_info: raise ValueError('No video in archive') # Free the information about irrelevant files before guessing file_info = file_info[0] # guess video_filename = file_info.filename video_path = os.path.join(dir_path, video_filename) video = Video.fromguess(video_path, guessit(video_path)) # size video.size = file_info.file_size return video
class RarArch( Arch ): def __init__( self, name, path = None ): Arch.__init__( self, name , path ) self.open() def open( self ): from rarfile import RarFile self.arch_desc = RarFile( self.name, 'r' ) def name_list( self ): for item in self.arch_desc.infolist(): if not item.isdir(): yield item.filename
def is_rar_media_file(r): """ Check if a rar archive contains media files. :param str r: rar file path :return: whether the rar archive contains media files :rtype: bool """ has_media = False rf = RarFile(r) for f in rf.infolist(): _, ext = os.path.splitext(f.filename) has_media |= ext in MEDIA_EXTS return has_media
def getRarMimeTypes(blobfile): """ Get mime-types from RAR archive """ filename = getattr(blobfile, 'filename', '') if hasattr(blobfile, 'getIterator'): blobfile = blobfile.getIterator() with NamedTemporaryFile(suffix=filename) as tmpfile: for chunk in blobfile: tmpfile.write(chunk) tmpfile.flush() zfile = RarFile(tmpfile.name) for zchild in zfile.infolist(): data = StringIO(zchild.header_data) filename = getattr(zchild, 'filename', None) mimetype = guessMimetype(data, filename) if not mimetype: continue yield mimetype
chunk_size = 4096 while True: gevent.sleep(0) chunk = src.read(chunk_size) if not chunk: break dst.write(chunk) total_copied += len(chunk) progress_cb(total_copied) if __name__ == "__main__": rf = RarFile('/media/zuhair/Shuttle/Legion.S02E01.1080p.WEB.H264-DEFLATE/legion.s02e01.1080p.web.h264-deflate.rar') info = rf.infolist()[0] src = rf.open(info, 'r') dst = open('/tmp/legion.mkv', mode='wb') progress_queue = Queue(maxsize=1) def on_progress(copied): try: progress_queue.put_nowait(copied) except QueueFullException: # Swallow the exception and go on pass def printer(): start_timestamp = time.time() loop_timestamp = start_timestamp
def unrar(self, path, rar_files, force=False): """ Extract RAR files. :param path: Path to look for files in :param rar_files: Names of RAR files :param force: process currently processing items :return: List of unpacked file names """ unpacked_files = [] if app.UNPACK and rar_files: self.log_and_output('Packed files detected: {rar_files}', level=logging.DEBUG, **{'rar_files': rar_files}) for archive in rar_files: self.log_and_output('Unpacking archive: {archive}', level=logging.DEBUG, **{'archive': archive}) failure = None try: rar_handle = RarFile(os.path.join(path, archive)) # check that the rar doesnt need a password if rar_handle.needs_password(): raise ValueError('Rar requires a password') # Skip extraction if any file in archive has previously been extracted skip_extraction = False for file_in_archive in [ os.path.basename(each.filename) for each in rar_handle.infolist() if not each.isdir() ]: if not force and self.already_postprocessed( file_in_archive): self.log_and_output( 'Archive file already post-processed, extraction skipped: {file_in_archive}', level=logging.DEBUG, **{'file_in_archive': file_in_archive}) skip_extraction = True break if app.POSTPONE_IF_NO_SUBS and os.path.isfile( os.path.join(path, file_in_archive)): self.log_and_output( 'Archive file already extracted, extraction skipped: {file_in_archive}', level=logging.DEBUG, **{'file_in_archive': file_in_archive}) skip_extraction = True break if not skip_extraction: # raise an exception if the rar file is broken rar_handle.testrar() rar_handle.extractall(path=path) for each in rar_handle.infolist(): if not each.isdir(): basename = os.path.basename(each.filename) unpacked_files.append(basename) del rar_handle except (BadRarFile, Error, NotRarFile, RarCannotExec, ValueError) as error: failure = (ex(error), 'Unpacking failed with a Rar error') except Exception as error: failure = (ex(error), 'Unpacking failed for an unknown reason') if failure is not None: self.log_and_output( 'Failed unpacking archive {archive}: {failure}', level=logging.WARNING, **{ 'archive': archive, 'failure': failure[0] }) self.missed_files.append( '{0}: Unpacking failed: {1}'.format( archive, failure[1])) self.result = False continue self.log_and_output('Extracted content: {unpacked_files}', level=logging.DEBUG, **{'unpacked_files': unpacked_files}) return unpacked_files