def decode_par2(parfile): """ Parse a par2 file and rename files listed in the par2 to their real name """ # Check if really a par2 file if not is_parfile(parfile): logging.info("Par2 file %s was not really a par2 file") return False # Parse the par2 file md5of16k = {} parse_par2_file(parfile, md5of16k) # Parse all files in the folder dirname = os.path.dirname(parfile) result = False for fn in os.listdir(dirname): filepath = os.path.join(dirname, fn) # Only check files if os.path.isfile(filepath): with open(filepath, "rb") as fileToMatch: first16k_data = fileToMatch.read(16384) # Check if we have this hash file_md5of16k = hashlib.md5(first16k_data).digest() if file_md5of16k in md5of16k: new_path = os.path.join(dirname, md5of16k[file_md5of16k]) # Make sure it's a unique name renamer(filepath, get_unique_filename(new_path)) result = True return result
def test_existing_file_without_extension(self): test_file = r"c:\some\filename" # Create obstructions self.fs.create_file(test_file) assert os.path.exists(test_file) assert filesystem.get_unique_filename( test_file).lower() == r"c:\some\filename.1"
def rename_similar(folder, skip_ext, name, skipped_files): """ Rename all other files in the 'folder' hierarchy after 'name' and move them to the root of 'folder'. Files having extension 'skip_ext' will be moved, but not renamed. Don't touch files in list `skipped_files` """ logging.debug('Give files in set "%s" matching names.', name) folder = os.path.normpath(folder) skip_ext = skip_ext.lower() for root, dirs, files in os.walk(folder): for f in files: path = os.path.join(root, f) if path in skipped_files: continue org, ext = os.path.splitext(f) if ext.lower() == skip_ext: # Move file, but do not rename newpath = os.path.join(folder, f) else: # Move file and rename newname = "%s%s" % (name, ext) newname = newname.replace("%fn", org) newpath = os.path.join(folder, newname) if path != newpath: newpath = get_unique_filename(newpath) try: logging.debug("Rename: %s to %s", path, newpath) renamer(path, newpath) except: logging.error(T("Failed to rename similar file: %s to %s"), clip_path(path), clip_path(newpath)) logging.info("Traceback: ", exc_info=True) cleanup_empty_directories(folder)
def test_existing_file(self): test_file = r"C:\dir\file.name" max_obstruct = 11 # High enough for double digits self.fs.create_file(test_file) assert os.path.exists(test_file) # Create obstructions for n in range(1, max_obstruct): file_n = r"C:\dir\file." + str(n) + ".name" self.fs.create_file(file_n) assert os.path.exists(file_n) assert filesystem.get_unique_filename(test_file).lower() == r"c:\dir\file." + str(max_obstruct) + ".name"
def deobfuscate_list(filelist, usefulname): """ Check all files in filelist, and if wanted, deobfuscate """ # to be sure, only keep really exsiting files: filelist = [f for f in filelist if os.path.exists(f)] # Search for par2 files in the filelist par2_files = [f for f in filelist if f.endswith(".par2")] # Found any par2 files we can use? run_renamer = True if not par2_files: logging.debug("No par2 files found to process, running renamer.") else: # Run par2 from SABnzbd on them for par2_file in par2_files: # Analyse data and analyse result logging.debug("Deobfuscate par2: handling %s", par2_file) if decode_par2(par2_file): logging.debug("Deobfuscate par2 repair/verify finished.") run_renamer = False else: logging.debug( "Deobfuscate par2 repair/verify did not find anything to rename." ) # No par2 files? Then we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name if run_renamer: logging.debug( "Trying to see if there are qualifying files to be deobfuscated") for filename in filelist: logging.debug("Deobfuscate inspecting %s", filename) file_size = os.path.getsize(filename) # Do we need to rename this file? # Criteria: big, not-excluded extension, obfuscated (in that order) if (file_size > MIN_FILE_SIZE and get_ext(filename) not in EXCLUDED_FILE_EXTS and is_probably_obfuscated( filename ) # this as last test to avoid unnecessary analysis ): # OK, rename path, file = os.path.split(filename) new_name = get_unique_filename( "%s%s" % (os.path.join(path, usefulname), get_ext(filename))) logging.info("Deobfuscate renaming %s to %s", filename, new_name) # Rename and make sure the new filename is unique renamer(filename, new_name) else: logging.info("No qualifying files found to deobfuscate")
def test_nonexistent_file(self): assert filesystem.get_unique_filename(r"C:\DIR\file.name").lower() == r"c:\dir\file.name" # Relative path assert filesystem.get_unique_filename(r"DIR\file.name").lower() == r"dir\file.name"
def test_nonexistent_file(self): assert filesystem.get_unique_filename("/dir/file.name") == "/dir/file.name" # Relative path assert filesystem.get_unique_filename("dir/file.name") == "dir/file.name"
def rar_renamer(nzo, workdir): """ Deobfuscate rar file names: Use header and content information to give RAR-files decent names """ nzo.status = Status.VERIFYING nzo.set_unpack_info("Repair", T("Trying RAR-based verification")) nzo.set_action_line(T("Trying RAR-based verification"), "...") renamed_files = 0 # This is the most important datastructure (in case of mixed obfuscated rarsets) rarvolnr = {} # rarvolnr will contain per rar vol number the rarfilenames and their respective contents (and maybe other characteristics, like filesizes). # for example: rarvolnr[6]['somerandomfilename.rar']={'readme.txt', 'linux.iso'}, # which means 'somerandomfilename.rar' has rarvolnumber 6, and contents 'readme.txt' and 'linux.iso' # if we find a rarfile with rarvolnumber 7, and 'linux.iso' in it, we have a match! # The volume number and real extension of a (obfuscated) rar file # so volnrext['dfakjldfalkjdfl.blabla'] = (14, 'part014.rar') or (2, 'r000') # Not really needed, but handy to avoid a second lookup at the renaming volnrext = {} # Scan rar files in workdir, but not subdirs workdir_files = os.listdir(workdir) for file_to_check in workdir_files: file_to_check = os.path.join(workdir, file_to_check) # We only want files: if not (os.path.isfile(file_to_check)): continue # The function will check if it's a RAR-file # We do a sanity-check for the returned number rar_vol, new_extension = rarvolinfo.get_rar_extension(file_to_check) if 0 < rar_vol < 1000: logging.debug("Detected volume-number %s from RAR-header: %s ", rar_vol, file_to_check) volnrext[file_to_check] = (rar_vol, new_extension) # The files inside rar file rar_contents = rarfile.RarFile(os.path.join( workdir, file_to_check), single_file_check=True).filelist() try: rarvolnr[rar_vol] except: # does not yet exist, so create: rarvolnr[rar_vol] = {} rarvolnr[rar_vol][ file_to_check] = rar_contents # store them for matching (if needed) else: logging.debug("No RAR-volume-number found in %s", file_to_check) logging.debug("Deobfuscate: rarvolnr is: %s", rarvolnr) logging.debug("Deobfuscate: volnrext is: %s", volnrext) # Could be that there are no rar-files, we stop if not len(rarvolnr): return renamed_files # Check number of different obfuscated rar sets: numberofrarsets = len(rarvolnr[1]) if numberofrarsets == 1: # Just one obfuscated rarset logging.debug("Deobfuscate: Just one obfuscated rarset") for filename in volnrext: new_rar_name = "%s.%s" % (nzo.final_name, volnrext[filename][1]) new_rar_name = os.path.join(workdir, new_rar_name) new_rar_name = get_unique_filename(new_rar_name) logging.debug("Deobfuscate: Renaming %s to %s" % (filename, new_rar_name)) renamer(filename, new_rar_name) renamed_files += 1 else: # More than one obfuscated rarset, so we must do matching based of files inside the rar files logging.debug("Number of obfuscated rarsets: %s", numberofrarsets) # Assign (random) rar set names rarsetname = { } # in which rar set it should be, so rar set 'A', or 'B', or ... mychar = "A" # First things first: Assigning a rarsetname to the rar file which have volume number 1 for base_obfuscated_filename in rarvolnr[1]: rarsetname[ base_obfuscated_filename] = mychar + "--" + nzo.final_name mychar = chr(ord(mychar) + 1) logging.debug("Deobfuscate: rarsetname %s", rarsetname) # Do the matching, layer by layer (read: rarvolnumber) # So, all rar files with rarvolnr 1, find the contents (files inside the rar), # and match with rarfiles with rarvolnr 2, and put them in the correct rarset. # And so on, until the highest rarvolnr minus 1 matched against highest rarvolnr for n in range(1, len(rarvolnr.keys())): logging.debug( "Deobfuscate: Finding matches between rar sets %s and %s" % (n, n + 1)) for base_obfuscated_filename in rarvolnr[n]: matchcounter = 0 for next_obfuscated_filename in rarvolnr[n + 1]: # set() method with intersection (less strict): set(rarvolnr[n][base_obfuscated_filename]).intersection(set(rarvolnr[n+1][next_obfuscated_filename])) # check if the last filename inside the existing rar matches with the first filename in the following rar if rarvolnr[n][base_obfuscated_filename][-1] == rarvolnr[ n + 1][next_obfuscated_filename][0]: try: rarsetname[next_obfuscated_filename] = rarsetname[ base_obfuscated_filename] matchcounter += 1 except KeyError: logging.warning( T("No matching earlier rar file for %s"), next_obfuscated_filename) if matchcounter > 1: logging.info( "Deobfuscate: more than one match, so risk on false positive matching." ) # Do the renaming: for filename in rarsetname: new_rar_name = "%s.%s" % (rarsetname[filename], volnrext[filename][1]) new_rar_name = os.path.join(workdir, new_rar_name) new_rar_name = get_unique_filename(new_rar_name) logging.debug("Deobfuscate: Renaming %s to %s" % (filename, new_rar_name)) renamer(filename, new_rar_name) renamed_files += 1 # Done: The obfuscated rar files have now been renamed to regular formatted filenames return renamed_files
def rar_renamer(nzo: NzbObject, workdir): """ Deobfuscate rar file names: Use header and content information to give RAR-files decent names """ nzo.status = Status.VERIFYING nzo.set_unpack_info("Repair", T("Trying RAR renamer")) nzo.set_action_line(T("Trying RAR renamer"), "...") renamed_files = 0 # This is the most important datastructure (in case of mixed obfuscated rarsets) rarvolnr = {} # rarvolnr will contain per rar vol number the rarfilenames and their respective contents (and maybe other characteristics, like filesizes). # for example: rarvolnr[6]['somerandomfilename.rar']={'readme.txt', 'linux.iso'}, # which means 'somerandomfilename.rar' has rarvolnumber 6, and contents 'readme.txt' and 'linux.iso' # if we find a rarfile with rarvolnumber 7, and 'linux.iso' in it, we have a match! # The volume number and real extension of a (obfuscated) rar file # so volnrext['dfakjldfalkjdfl.blabla'] = (14, 'part014.rar') or (2, 'r000') # Not really needed, but handy to avoid a second lookup at the renaming volnrext = {} # Scan rar files in workdir, but not subdirs workdir_files = os.listdir(workdir) for file_to_check in workdir_files: file_to_check = os.path.join(workdir, file_to_check) # We only want files: if not (os.path.isfile(file_to_check)): continue # The function will check if it's a RAR-file # We do a sanity-check for the returned number rar_vol, new_extension = rarvolinfo.get_rar_extension(file_to_check) if 0 < rar_vol < 1000: logging.debug("Detected volume-number %s from RAR-header: %s ", rar_vol, file_to_check) volnrext[file_to_check] = (rar_vol, new_extension) # The files inside rar file rar_contents = rarfile.RarFile(os.path.join( workdir, file_to_check), single_file_check=True).filelist() try: rarvolnr[rar_vol] except: # does not yet exist, so create: rarvolnr[rar_vol] = {} rarvolnr[rar_vol][ file_to_check] = rar_contents # store them for matching (if needed) else: logging.debug("No RAR-volume-number found in %s", file_to_check) logging.debug("Deobfuscate: rarvolnr is: %s", rarvolnr) logging.debug("Deobfuscate: volnrext is: %s", volnrext) # Could be that there are no rar-files, we stop if not len(rarvolnr): return renamed_files # this can probably done with a max-key-lambda oneliner, but ... how? numberofrarsets = 0 for mykey in rarvolnr.keys(): numberofrarsets = max(numberofrarsets, len(rarvolnr[mykey])) logging.debug("Number of rarset is %s", numberofrarsets) if numberofrarsets == 1: # Just one obfuscated rarset ... that's easy logging.debug("Deobfuscate: Just one obfuscated rarset") for filename in volnrext: new_rar_name = "%s.%s" % (nzo.final_name, volnrext[filename][1]) new_rar_name = os.path.join(workdir, new_rar_name) new_rar_name = get_unique_filename(new_rar_name) logging.debug("Deobfuscate: Renaming %s to %s" % (filename, new_rar_name)) renamer(filename, new_rar_name) renamed_files += 1 return renamed_files # numberofrarsets bigger than 1, so a mixed rar set, so we need pre-checking # Sanity check of the rar set # Get the highest rar part number (that's the upper limit): highest_rar = sorted(rarvolnr.keys())[-1] # A staircase check: number of rarsets should no go up, but stay the same or go down how_many_previous = 1000 # 1000 rarset mixed ... should be enough ... typical is 1, 2 or maybe 3 # Start at part001.rar and go the highest for rar_set_number in range(1, highest_rar + 1): try: how_many_here = len(rarvolnr[rar_set_number]) except: # rarset does not exist at all logging.warning( "rarset %s is missing completely, so I can't deobfuscate.", rar_set_number) return 0 # OK, it exists, now let's check it's not higher if how_many_here > how_many_previous: # this should not happen: higher number of rarset than previous number of rarset logging.warning( "no staircase! rarset %s is higher than previous, so I can't deobfuscate.", rar_set_number) return 0 how_many_previous = how_many_here # OK, that looked OK (a declining staircase), so we can safely proceed # More than one obfuscated rarset, so we must do matching based of files inside the rar files # Assign (random) rar set names, first come first serve basis rarsetname = { } # in which rar set it should be, so rar set 'A', or 'B', or ... mychar = "A" # First things first: Assigning a rarsetname to the rar file which have volume number 1 for base_obfuscated_filename in rarvolnr[1]: rarsetname[base_obfuscated_filename] = mychar + "--" + nzo.final_name mychar = chr(ord(mychar) + 1) logging.debug("Deobfuscate: rarsetname %s", rarsetname) # Do the matching, layer by layer (read: rarvolnumber) # So, all rar files with rarvolnr 1, find the contents (files inside the rar), # and match with rarfiles with rarvolnr 2, and put them in the correct rarset. # And so on, until the highest rarvolnr minus 1 matched against highest rarvolnr for n in range(1, len(rarvolnr)): logging.debug( "Deobfuscate: Finding matches between rar sets %s and %s" % (n, n + 1)) for base_obfuscated_filename in rarvolnr[n]: matchcounter = 0 for next_obfuscated_filename in rarvolnr[n + 1]: # set() method with intersection (less strict): set(rarvolnr[n][base_obfuscated_filename]).intersection(set(rarvolnr[n+1][next_obfuscated_filename])) # check if the last filename inside the existing rar matches with the first filename in the following rar if rarvolnr[n][base_obfuscated_filename][-1] == rarvolnr[ n + 1][next_obfuscated_filename][0]: try: rarsetname[next_obfuscated_filename] = rarsetname[ base_obfuscated_filename] matchcounter += 1 except KeyError: logging.warning( T("No matching earlier rar file for %s"), next_obfuscated_filename) if matchcounter > 1: logging.info( "Deobfuscate: more than one match, so risk on false positive matching." ) # Do the renaming: for filename in rarsetname: new_rar_name = "%s.%s" % (rarsetname[filename], volnrext[filename][1]) new_rar_name = os.path.join(workdir, new_rar_name) new_rar_name = get_unique_filename(new_rar_name) logging.debug("Deobfuscate: Renaming %s to %s" % (filename, new_rar_name)) renamer(filename, new_rar_name) renamed_files += 1 # Done: The obfuscated rar files have now been renamed to regular formatted filenames return renamed_files
def deobfuscate_list(filelist, usefulname): """ Check all files in filelist, and if wanted, deobfuscate: rename to filename based on usefulname""" # to be sure, only keep really exsiting files: filelist = [f for f in filelist if os.path.exists(f)] # Search for par2 files in the filelist par2_files = [f for f in filelist if f.endswith(".par2")] # Found any par2 files we can use? run_renamer = True if not par2_files: logging.debug("No par2 files found to process, running renamer.") else: # Run par2 from SABnzbd on them for par2_file in par2_files: # Analyse data and analyse result logging.debug("Deobfuscate par2: handling %s", par2_file) if decode_par2(par2_file): logging.debug("Deobfuscate par2 repair/verify finished.") run_renamer = False else: logging.debug( "Deobfuscate par2 repair/verify did not find anything to rename." ) # No par2 files? Then we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name if run_renamer: excluded_file_exts = EXCLUDED_FILE_EXTS # If there is a collection with bigger files with the same extension, we don't want to rename it extcounter = {} for file in filelist: if os.path.getsize(file) < MIN_FILE_SIZE: # too small to care continue _, ext = os.path.splitext(file) if ext in extcounter: extcounter[ext] += 1 else: extcounter[ext] = 1 if extcounter[ext] >= 3 and ext not in excluded_file_exts: # collection, and extension not yet in excluded_file_exts, so add it excluded_file_exts = (*excluded_file_exts, ext) logging.debug( "Found a collection of at least %s files with extension %s, so not renaming those files", extcounter[ext], ext, ) logging.debug( "Trying to see if there are qualifying files to be deobfuscated") # We start with he biggest file ... probably the most important file filelist = sorted(filelist, key=os.path.getsize, reverse=True) for filename in filelist: # check that file is still there (and not renamed by the secondary renaming process below) if not os.path.isfile(filename): continue logging.debug("Deobfuscate inspecting %s", filename) # Do we need to rename this file? # Criteria: big, not-excluded extension, obfuscated (in that order) if (os.path.getsize(filename) > MIN_FILE_SIZE and get_ext(filename) not in excluded_file_exts and is_probably_obfuscated( filename ) # this as last test to avoid unnecessary analysis ): # Rename and make sure the new filename is unique path, file = os.path.split(filename) # construct new_name: <path><usefulname><extension> new_name = get_unique_filename( "%s%s" % (os.path.join(path, usefulname), get_ext(filename))) logging.info("Deobfuscate renaming %s to %s", filename, new_name) renamer(filename, new_name) # find other files with the same basename in filelist, and rename them in the same way: basedirfile, _ = os.path.splitext( filename) # something like "/home/this/myiso" for otherfile in filelist: if otherfile.startswith(basedirfile + ".") and os.path.isfile(otherfile): # yes, same basedirfile, only different extension remainingextension = otherfile.replace( basedirfile, "") # might be long ext, like ".dut.srt" new_name = get_unique_filename("%s%s" % (os.path.join( path, usefulname), remainingextension)) logging.info("Deobfuscate renaming %s to %s", otherfile, new_name) # Rename and make sure the new filename is unique renamer(otherfile, new_name) else: logging.info("No qualifying files found to deobfuscate")