Esempio n. 1
0
def decode_par2(parfile):
    """ Parse a par2 file and rename files listed in the par2 to their real name """
    # Check if really a par2 file
    if not is_parfile(parfile):
        logging.info("Par2 file %s was not really a par2 file")
        return False

    # Parse the par2 file
    md5of16k = {}
    parse_par2_file(parfile, md5of16k)

    # Parse all files in the folder
    dirname = os.path.dirname(parfile)
    result = False
    for fn in os.listdir(dirname):
        filepath = os.path.join(dirname, fn)
        # Only check files
        if os.path.isfile(filepath):
            with open(filepath, "rb") as fileToMatch:
                first16k_data = fileToMatch.read(16384)

            # Check if we have this hash
            file_md5of16k = hashlib.md5(first16k_data).digest()
            if file_md5of16k in md5of16k:
                new_path = os.path.join(dirname, md5of16k[file_md5of16k])
                # Make sure it's a unique name
                renamer(filepath, get_unique_filename(new_path))
                result = True
    return result
Esempio n. 2
0
 def test_existing_file_without_extension(self):
     test_file = r"c:\some\filename"
     # Create obstructions
     self.fs.create_file(test_file)
     assert os.path.exists(test_file)
     assert filesystem.get_unique_filename(
         test_file).lower() == r"c:\some\filename.1"
Esempio n. 3
0
def rename_similar(folder, skip_ext, name, skipped_files):
    """ Rename all other files in the 'folder' hierarchy after 'name'
        and move them to the root of 'folder'.
        Files having extension 'skip_ext' will be moved, but not renamed.
        Don't touch files in list `skipped_files`
    """
    logging.debug('Give files in set "%s" matching names.', name)
    folder = os.path.normpath(folder)
    skip_ext = skip_ext.lower()

    for root, dirs, files in os.walk(folder):
        for f in files:
            path = os.path.join(root, f)
            if path in skipped_files:
                continue
            org, ext = os.path.splitext(f)
            if ext.lower() == skip_ext:
                # Move file, but do not rename
                newpath = os.path.join(folder, f)
            else:
                # Move file and rename
                newname = "%s%s" % (name, ext)
                newname = newname.replace("%fn", org)
                newpath = os.path.join(folder, newname)
            if path != newpath:
                newpath = get_unique_filename(newpath)
                try:
                    logging.debug("Rename: %s to %s", path, newpath)
                    renamer(path, newpath)
                except:
                    logging.error(T("Failed to rename similar file: %s to %s"), clip_path(path), clip_path(newpath))
                    logging.info("Traceback: ", exc_info=True)
    cleanup_empty_directories(folder)
Esempio n. 4
0
 def test_existing_file(self):
     test_file = r"C:\dir\file.name"
     max_obstruct = 11  # High enough for double digits
     self.fs.create_file(test_file)
     assert os.path.exists(test_file)
     # Create obstructions
     for n in range(1, max_obstruct):
         file_n = r"C:\dir\file." + str(n) + ".name"
         self.fs.create_file(file_n)
         assert os.path.exists(file_n)
     assert filesystem.get_unique_filename(test_file).lower() == r"c:\dir\file." + str(max_obstruct) + ".name"
Esempio n. 5
0
def deobfuscate_list(filelist, usefulname):
    """ Check all files in filelist, and if wanted, deobfuscate """

    # to be sure, only keep really exsiting files:
    filelist = [f for f in filelist if os.path.exists(f)]

    # Search for par2 files in the filelist
    par2_files = [f for f in filelist if f.endswith(".par2")]

    # Found any par2 files we can use?
    run_renamer = True
    if not par2_files:
        logging.debug("No par2 files found to process, running renamer.")
    else:
        # Run par2 from SABnzbd on them
        for par2_file in par2_files:
            # Analyse data and analyse result
            logging.debug("Deobfuscate par2: handling %s", par2_file)
            if decode_par2(par2_file):
                logging.debug("Deobfuscate par2 repair/verify finished.")
                run_renamer = False
            else:
                logging.debug(
                    "Deobfuscate par2 repair/verify did not find anything to rename."
                )

    # No par2 files? Then we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name
    if run_renamer:
        logging.debug(
            "Trying to see if there are qualifying files to be deobfuscated")
        for filename in filelist:
            logging.debug("Deobfuscate inspecting %s", filename)
            file_size = os.path.getsize(filename)
            # Do we need to rename this file?
            # Criteria: big, not-excluded extension, obfuscated (in that order)
            if (file_size > MIN_FILE_SIZE
                    and get_ext(filename) not in EXCLUDED_FILE_EXTS
                    and is_probably_obfuscated(
                        filename
                    )  # this as last test to avoid unnecessary analysis
                ):
                # OK, rename
                path, file = os.path.split(filename)
                new_name = get_unique_filename(
                    "%s%s" %
                    (os.path.join(path, usefulname), get_ext(filename)))
                logging.info("Deobfuscate renaming %s to %s", filename,
                             new_name)
                # Rename and make sure the new filename is unique
                renamer(filename, new_name)
    else:
        logging.info("No qualifying files found to deobfuscate")
Esempio n. 6
0
 def test_nonexistent_file(self):
     assert filesystem.get_unique_filename(r"C:\DIR\file.name").lower() == r"c:\dir\file.name"
     # Relative path
     assert filesystem.get_unique_filename(r"DIR\file.name").lower() == r"dir\file.name"
Esempio n. 7
0
 def test_nonexistent_file(self):
     assert filesystem.get_unique_filename("/dir/file.name") == "/dir/file.name"
     # Relative path
     assert filesystem.get_unique_filename("dir/file.name") == "dir/file.name"
Esempio n. 8
0
def rar_renamer(nzo, workdir):
    """ Deobfuscate rar file names: Use header and content information to give RAR-files decent names """
    nzo.status = Status.VERIFYING
    nzo.set_unpack_info("Repair", T("Trying RAR-based verification"))
    nzo.set_action_line(T("Trying RAR-based verification"), "...")

    renamed_files = 0

    # This is the most important datastructure (in case of mixed obfuscated rarsets)
    rarvolnr = {}
    # rarvolnr will contain per rar vol number the rarfilenames and their respective contents (and maybe other characteristics, like filesizes).
    # for example: rarvolnr[6]['somerandomfilename.rar']={'readme.txt', 'linux.iso'},
    # which means 'somerandomfilename.rar' has rarvolnumber 6, and contents 'readme.txt' and 'linux.iso'
    # if we find a rarfile with rarvolnumber 7, and 'linux.iso' in it, we have a match!

    # The volume number and real extension of a (obfuscated) rar file
    # so volnrext['dfakjldfalkjdfl.blabla'] = (14, 'part014.rar') or (2, 'r000')
    # Not really needed, but handy to avoid a second lookup at the renaming
    volnrext = {}

    # Scan rar files in workdir, but not subdirs
    workdir_files = os.listdir(workdir)
    for file_to_check in workdir_files:
        file_to_check = os.path.join(workdir, file_to_check)
        # We only want files:
        if not (os.path.isfile(file_to_check)):
            continue
        # The function will check if it's a RAR-file
        # We do a sanity-check for the returned number
        rar_vol, new_extension = rarvolinfo.get_rar_extension(file_to_check)
        if 0 < rar_vol < 1000:
            logging.debug("Detected volume-number %s from RAR-header: %s ",
                          rar_vol, file_to_check)
            volnrext[file_to_check] = (rar_vol, new_extension)
            # The files inside rar file
            rar_contents = rarfile.RarFile(os.path.join(
                workdir, file_to_check),
                                           single_file_check=True).filelist()
            try:
                rarvolnr[rar_vol]
            except:
                # does not yet exist, so create:
                rarvolnr[rar_vol] = {}
            rarvolnr[rar_vol][
                file_to_check] = rar_contents  # store them for matching (if needed)
        else:
            logging.debug("No RAR-volume-number found in %s", file_to_check)

    logging.debug("Deobfuscate: rarvolnr is: %s", rarvolnr)
    logging.debug("Deobfuscate: volnrext is: %s", volnrext)

    # Could be that there are no rar-files, we stop
    if not len(rarvolnr):
        return renamed_files

    # Check number of different obfuscated rar sets:
    numberofrarsets = len(rarvolnr[1])
    if numberofrarsets == 1:
        # Just one obfuscated rarset
        logging.debug("Deobfuscate: Just one obfuscated rarset")
        for filename in volnrext:
            new_rar_name = "%s.%s" % (nzo.final_name, volnrext[filename][1])
            new_rar_name = os.path.join(workdir, new_rar_name)
            new_rar_name = get_unique_filename(new_rar_name)
            logging.debug("Deobfuscate: Renaming %s to %s" %
                          (filename, new_rar_name))
            renamer(filename, new_rar_name)
            renamed_files += 1
    else:
        # More than one obfuscated rarset, so we must do matching based of files inside the rar files
        logging.debug("Number of obfuscated rarsets: %s", numberofrarsets)

        # Assign (random) rar set names
        rarsetname = {
        }  # in which rar set it should be, so rar set 'A', or 'B', or ...
        mychar = "A"
        # First things first: Assigning a rarsetname to the rar file which have volume number 1
        for base_obfuscated_filename in rarvolnr[1]:
            rarsetname[
                base_obfuscated_filename] = mychar + "--" + nzo.final_name
            mychar = chr(ord(mychar) + 1)
        logging.debug("Deobfuscate: rarsetname %s", rarsetname)

        # Do the matching, layer by layer (read: rarvolnumber)
        # So, all rar files with rarvolnr 1, find the contents (files inside the rar),
        # and match with rarfiles with rarvolnr 2, and put them in the correct rarset.
        # And so on, until the highest rarvolnr minus 1 matched against highest rarvolnr
        for n in range(1, len(rarvolnr.keys())):
            logging.debug(
                "Deobfuscate: Finding matches between rar sets %s and %s" %
                (n, n + 1))
            for base_obfuscated_filename in rarvolnr[n]:
                matchcounter = 0
                for next_obfuscated_filename in rarvolnr[n + 1]:
                    # set() method with intersection (less strict): set(rarvolnr[n][base_obfuscated_filename]).intersection(set(rarvolnr[n+1][next_obfuscated_filename]))
                    # check if the last filename inside the existing rar matches with the first filename in the following rar
                    if rarvolnr[n][base_obfuscated_filename][-1] == rarvolnr[
                            n + 1][next_obfuscated_filename][0]:
                        try:
                            rarsetname[next_obfuscated_filename] = rarsetname[
                                base_obfuscated_filename]
                            matchcounter += 1
                        except KeyError:
                            logging.warning(
                                T("No matching earlier rar file for %s"),
                                next_obfuscated_filename)
                if matchcounter > 1:
                    logging.info(
                        "Deobfuscate: more than one match, so risk on false positive matching."
                    )

        # Do the renaming:
        for filename in rarsetname:
            new_rar_name = "%s.%s" % (rarsetname[filename],
                                      volnrext[filename][1])
            new_rar_name = os.path.join(workdir, new_rar_name)
            new_rar_name = get_unique_filename(new_rar_name)
            logging.debug("Deobfuscate: Renaming %s to %s" %
                          (filename, new_rar_name))
            renamer(filename, new_rar_name)
            renamed_files += 1

    # Done: The obfuscated rar files have now been renamed to regular formatted filenames
    return renamed_files
Esempio n. 9
0
def rar_renamer(nzo: NzbObject, workdir):
    """ Deobfuscate rar file names: Use header and content information to give RAR-files decent names """
    nzo.status = Status.VERIFYING
    nzo.set_unpack_info("Repair", T("Trying RAR renamer"))
    nzo.set_action_line(T("Trying RAR renamer"), "...")

    renamed_files = 0

    # This is the most important datastructure (in case of mixed obfuscated rarsets)
    rarvolnr = {}
    # rarvolnr will contain per rar vol number the rarfilenames and their respective contents (and maybe other characteristics, like filesizes).
    # for example: rarvolnr[6]['somerandomfilename.rar']={'readme.txt', 'linux.iso'},
    # which means 'somerandomfilename.rar' has rarvolnumber 6, and contents 'readme.txt' and 'linux.iso'
    # if we find a rarfile with rarvolnumber 7, and 'linux.iso' in it, we have a match!

    # The volume number and real extension of a (obfuscated) rar file
    # so volnrext['dfakjldfalkjdfl.blabla'] = (14, 'part014.rar') or (2, 'r000')
    # Not really needed, but handy to avoid a second lookup at the renaming
    volnrext = {}

    # Scan rar files in workdir, but not subdirs
    workdir_files = os.listdir(workdir)
    for file_to_check in workdir_files:
        file_to_check = os.path.join(workdir, file_to_check)
        # We only want files:
        if not (os.path.isfile(file_to_check)):
            continue
        # The function will check if it's a RAR-file
        # We do a sanity-check for the returned number
        rar_vol, new_extension = rarvolinfo.get_rar_extension(file_to_check)
        if 0 < rar_vol < 1000:
            logging.debug("Detected volume-number %s from RAR-header: %s ",
                          rar_vol, file_to_check)
            volnrext[file_to_check] = (rar_vol, new_extension)
            # The files inside rar file
            rar_contents = rarfile.RarFile(os.path.join(
                workdir, file_to_check),
                                           single_file_check=True).filelist()
            try:
                rarvolnr[rar_vol]
            except:
                # does not yet exist, so create:
                rarvolnr[rar_vol] = {}
            rarvolnr[rar_vol][
                file_to_check] = rar_contents  # store them for matching (if needed)
        else:
            logging.debug("No RAR-volume-number found in %s", file_to_check)

    logging.debug("Deobfuscate: rarvolnr is: %s", rarvolnr)
    logging.debug("Deobfuscate: volnrext is: %s", volnrext)

    # Could be that there are no rar-files, we stop
    if not len(rarvolnr):
        return renamed_files

    # this can probably done with a max-key-lambda oneliner, but ... how?
    numberofrarsets = 0
    for mykey in rarvolnr.keys():
        numberofrarsets = max(numberofrarsets, len(rarvolnr[mykey]))
    logging.debug("Number of rarset is %s", numberofrarsets)

    if numberofrarsets == 1:
        # Just one obfuscated rarset ... that's easy
        logging.debug("Deobfuscate: Just one obfuscated rarset")
        for filename in volnrext:
            new_rar_name = "%s.%s" % (nzo.final_name, volnrext[filename][1])
            new_rar_name = os.path.join(workdir, new_rar_name)
            new_rar_name = get_unique_filename(new_rar_name)
            logging.debug("Deobfuscate: Renaming %s to %s" %
                          (filename, new_rar_name))
            renamer(filename, new_rar_name)
            renamed_files += 1
        return renamed_files

    # numberofrarsets bigger than 1, so a mixed rar set, so we need pre-checking

    # Sanity check of the rar set
    # Get the highest rar part number (that's the upper limit):
    highest_rar = sorted(rarvolnr.keys())[-1]
    # A staircase check: number of rarsets should no go up, but stay the same or go down
    how_many_previous = 1000  # 1000 rarset mixed ... should be enough ... typical is 1, 2 or maybe 3
    # Start at part001.rar and go the highest
    for rar_set_number in range(1, highest_rar + 1):
        try:
            how_many_here = len(rarvolnr[rar_set_number])
        except:
            # rarset does not exist at all
            logging.warning(
                "rarset %s is missing completely, so I can't deobfuscate.",
                rar_set_number)
            return 0
        # OK, it exists, now let's check it's not higher
        if how_many_here > how_many_previous:
            # this should not happen: higher number of rarset than previous number of rarset
            logging.warning(
                "no staircase! rarset %s is higher than previous, so I can't deobfuscate.",
                rar_set_number)
            return 0
        how_many_previous = how_many_here

    # OK, that looked OK (a declining staircase), so we can safely proceed
    # More than one obfuscated rarset, so we must do matching based of files inside the rar files

    # Assign (random) rar set names, first come first serve basis
    rarsetname = {
    }  # in which rar set it should be, so rar set 'A', or 'B', or ...
    mychar = "A"
    # First things first: Assigning a rarsetname to the rar file which have volume number 1
    for base_obfuscated_filename in rarvolnr[1]:
        rarsetname[base_obfuscated_filename] = mychar + "--" + nzo.final_name
        mychar = chr(ord(mychar) + 1)
    logging.debug("Deobfuscate: rarsetname %s", rarsetname)

    # Do the matching, layer by layer (read: rarvolnumber)
    # So, all rar files with rarvolnr 1, find the contents (files inside the rar),
    # and match with rarfiles with rarvolnr 2, and put them in the correct rarset.
    # And so on, until the highest rarvolnr minus 1 matched against highest rarvolnr
    for n in range(1, len(rarvolnr)):
        logging.debug(
            "Deobfuscate: Finding matches between rar sets %s and %s" %
            (n, n + 1))
        for base_obfuscated_filename in rarvolnr[n]:
            matchcounter = 0
            for next_obfuscated_filename in rarvolnr[n + 1]:
                # set() method with intersection (less strict): set(rarvolnr[n][base_obfuscated_filename]).intersection(set(rarvolnr[n+1][next_obfuscated_filename]))
                # check if the last filename inside the existing rar matches with the first filename in the following rar
                if rarvolnr[n][base_obfuscated_filename][-1] == rarvolnr[
                        n + 1][next_obfuscated_filename][0]:
                    try:
                        rarsetname[next_obfuscated_filename] = rarsetname[
                            base_obfuscated_filename]
                        matchcounter += 1
                    except KeyError:
                        logging.warning(
                            T("No matching earlier rar file for %s"),
                            next_obfuscated_filename)
            if matchcounter > 1:
                logging.info(
                    "Deobfuscate: more than one match, so risk on false positive matching."
                )

    # Do the renaming:
    for filename in rarsetname:
        new_rar_name = "%s.%s" % (rarsetname[filename], volnrext[filename][1])
        new_rar_name = os.path.join(workdir, new_rar_name)
        new_rar_name = get_unique_filename(new_rar_name)
        logging.debug("Deobfuscate: Renaming %s to %s" %
                      (filename, new_rar_name))
        renamer(filename, new_rar_name)
        renamed_files += 1

    # Done: The obfuscated rar files have now been renamed to regular formatted filenames
    return renamed_files
Esempio n. 10
0
def deobfuscate_list(filelist, usefulname):
    """ Check all files in filelist, and if wanted, deobfuscate: rename to filename based on usefulname"""

    # to be sure, only keep really exsiting files:
    filelist = [f for f in filelist if os.path.exists(f)]

    # Search for par2 files in the filelist
    par2_files = [f for f in filelist if f.endswith(".par2")]
    # Found any par2 files we can use?
    run_renamer = True
    if not par2_files:
        logging.debug("No par2 files found to process, running renamer.")
    else:
        # Run par2 from SABnzbd on them
        for par2_file in par2_files:
            # Analyse data and analyse result
            logging.debug("Deobfuscate par2: handling %s", par2_file)
            if decode_par2(par2_file):
                logging.debug("Deobfuscate par2 repair/verify finished.")
                run_renamer = False
            else:
                logging.debug(
                    "Deobfuscate par2 repair/verify did not find anything to rename."
                )

    # No par2 files? Then we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name
    if run_renamer:
        excluded_file_exts = EXCLUDED_FILE_EXTS
        # If there is a collection with bigger files with the same extension, we don't want to rename it
        extcounter = {}
        for file in filelist:
            if os.path.getsize(file) < MIN_FILE_SIZE:
                # too small to care
                continue
            _, ext = os.path.splitext(file)
            if ext in extcounter:
                extcounter[ext] += 1
            else:
                extcounter[ext] = 1
            if extcounter[ext] >= 3 and ext not in excluded_file_exts:
                # collection, and extension not yet in excluded_file_exts, so add it
                excluded_file_exts = (*excluded_file_exts, ext)
                logging.debug(
                    "Found a collection of at least %s files with extension %s, so not renaming those files",
                    extcounter[ext],
                    ext,
                )

        logging.debug(
            "Trying to see if there are qualifying files to be deobfuscated")
        # We start with he biggest file ... probably the most important file
        filelist = sorted(filelist, key=os.path.getsize, reverse=True)
        for filename in filelist:
            # check that file is still there (and not renamed by the secondary renaming process below)
            if not os.path.isfile(filename):
                continue
            logging.debug("Deobfuscate inspecting %s", filename)
            # Do we need to rename this file?
            # Criteria: big, not-excluded extension, obfuscated (in that order)
            if (os.path.getsize(filename) > MIN_FILE_SIZE
                    and get_ext(filename) not in excluded_file_exts
                    and is_probably_obfuscated(
                        filename
                    )  # this as last test to avoid unnecessary analysis
                ):
                # Rename and make sure the new filename is unique
                path, file = os.path.split(filename)
                # construct new_name: <path><usefulname><extension>
                new_name = get_unique_filename(
                    "%s%s" %
                    (os.path.join(path, usefulname), get_ext(filename)))
                logging.info("Deobfuscate renaming %s to %s", filename,
                             new_name)
                renamer(filename, new_name)
                # find other files with the same basename in filelist, and rename them in the same way:
                basedirfile, _ = os.path.splitext(
                    filename)  # something like "/home/this/myiso"
                for otherfile in filelist:
                    if otherfile.startswith(basedirfile +
                                            ".") and os.path.isfile(otherfile):
                        # yes, same basedirfile, only different extension
                        remainingextension = otherfile.replace(
                            basedirfile,
                            "")  # might be long ext, like ".dut.srt"
                        new_name = get_unique_filename("%s%s" % (os.path.join(
                            path, usefulname), remainingextension))
                        logging.info("Deobfuscate renaming %s to %s",
                                     otherfile, new_name)
                        # Rename and make sure the new filename is unique
                        renamer(otherfile, new_name)
    else:
        logging.info("No qualifying files found to deobfuscate")