def search(file_list):
    """Looking for duplicate files in the provided list of files
    :returns a list of lists, where each list contains files with the same content

    Basic search strategy goes like this:
    - until the provided list is empty.
    - remove the 1st item from the provided file_list
    - search for its duplicates in the remaining list and put the item and all its duplicates into a new list
    - if that new list has more than one item (i.e. we did find duplicates) save the list in the list of lists
    As a result we have a list, each item of that list is a list,
    each of those lists contains files that have the same content
    """
    lol = []
    while 0 < len(file_list):

        dups = list(filter(lambda x: compare(file_list[0], x), file_list))

        if len(dups) > 1:
            lol.append(dups)

        # continuously updating file_list so it's not an infinite loop
        file_list = list(
            filter(lambda x: not compare(file_list[0], x), file_list))

    return lol
Exemple #2
0
def search(file_list):
    '''Looking for duplicate files in the provided list of files:
    returns a list of lists, where each list contains files with the same content'''
    lol = []  #empty list of lists
    while 0 < len(file_list):
        dups = [x for x in file_list if compare(file_list[0], x)
                ]  #copy duplicates of first file in list to dups
        file_list = [x for x in file_list if not compare(file_list[0], x)
                     ]  #files that don't compare stays in file list
        if 1 < len(dups):
            lol.append(dups)
    return lol
Exemple #3
0
def search(file_list):
    """Iterates through the list of files(file_list) created by the 'all_files' function, creating lists of duplicates
and appending those lists to a grand list of lists of duplicates. The function returns a list of lists."""
    list_of_lists = []
    while 0 < len(file_list):
        duplicate_files = list(
            filter(lambda file: compare(file_list[0], file), file_list))
        if 1 < len(duplicate_files):
            list_of_lists.append(duplicate_files)
        file_list = list(
            filter(lambda file: not compare(file_list[0], file), file_list))
    return list_of_lists
Exemple #4
0
def search(file_list):
    """Looking for duplicate files in the provided list of files
    :returns a list of lists, where each list contains files with the same content

    Basic search strategy goes like this:
    - until the provided list is empty.
    - remove the 1st item from the provided file_list
    - search for its duplicates in the remaining list and put the item and all its duplicates into a new list
    - if that new list has more than one item (i.e. we did find duplicates) save the list in the list of lists
    As a result we have a list, each item of that list is a list,
    each of those lists contains files that have the same content
    """

    lol = []
    n = 0

    while 0 < len(file_list):
        h = file_list.pop(0)
        lol.append([h])
        n += 1
        for i in file_list:
            if compare(h, i):
                lol[n - 1].append(i)
            else:
                break
    return lol
Exemple #5
0
def search(file_list):
    """Looking for duplicate files in the provided list of files
    :returns a list of lists, where each list contains files with the same content

    Basic search strategy goes like this:
    - until the provided list is empty.
    - remove the 1st item from the provided file_list
    - search for its duplicates in the remaining list and put the item and all its duplicates into a new list
    - if that new list has more than one item (i.e. we did find duplicates) save the list in the list of lists
    As a result we have a list, each item of that list is a list,
    each of those lists contains files that have the same content
    """
    lol = []
    while 0 < len(file_list):
        """
        Removes the first item from the list of every file and compares it against
        every single file in the file list. Adds it to a list of duplicates, then
        the list of duplicates is added to the list of lists
        """
        duplicates = [file_list.pop(0)]
        for i in range(len(file_list) - 1, -1, -1):
            if compare(duplicates[0], file_list[i]):
                duplicates.append(file_list.pop(i))
        if 1 < len(duplicates):
            lol.append(duplicates)
    return lol
def faster_search(file_list):
    """Looking for duplicate files in the provided list of files
    :returns a dictionary, where each key contains files with the same content

    Here's an idea: executing the compare() function seems to take a lot of time.
    Therefore, let's optimize and try to call it a little less often.
    """
    lol = {}
    lst1 = sorted(file_list, key = getsize)
    lst2 = sorted(file_list, key = getsize)
    #while 0 < len(file_list):
    #   i = file_list.pop(0)  # pops last item in lst  #6 seconds slower for some reason
    for i in lst1:
        if i in lst2:
            lst2.remove(i)
        for k in lst2:
            if getsize(k) <= getsize(i):
                if getsize(k) == getsize(i):
                    if compare(i, k):
                        if i not in list(lol.keys()):
                            lol.update({i: []})
                        lol[i].append(k)
                        lst1.remove(k)
                else:
                    lst2.remove(k)
            else:
                break

    return lol
def faster_search(file_list):
    """Looking for duplicate files in the provided list of files
    :returns a list of lists, where each list contains files with the same content

    Here's an idea: executing the compare() function seems to take a lot of time.
    Therefore, let's optimize and try to call it a little less often.
    """
    lol = []

    while 0 < len(file_list):

        dups = []
        name = file_list.pop(
            0)  # Removes the 1st item from the provided file_list

        dups = [
            name
        ]  # Beginning the search for any duplicates in the remaining list

        for i in range(len(file_list) - 1, -1, -1):

            if compare(name, file_list[i]):

                dups.append(file_list.pop(i))
        if len(dups) > 1:
            lol.append(dups)

    return lol
Exemple #8
0
def faster_search(file_list):
    file_sizes = list(map(getsize, file_list))
    file_list = list(
        filter(lambda x: 1 < file_sizes.count(getsize(x)), file_list))
    lol = []
    while 0 < len(file_list):
        dups = [file_list.pop(0)]
        for i in range(len(file_list) - 1, -1, -1):
            if compare(dups[0], file_list[i]):
                dups.append(file_list.pop(i))
        if 1 < len(dups):
            lol.append(dups)
    return lol
Exemple #9
0
def search(file_list):
    lol = []
    while 0 < len(file_list):
        dups = []
        next = []
        for i in file_list:
            if compare(file_list[0], i):
                dups.append(i)
            else:
                next.append(i)
        if 1 < len(dups):
            lol.append(dups)
        file_list = next
    return lol
Exemple #10
0
def faster_search(file_list):
    """Serves the same purpose as the 'search' function, however before iterating through, this function removes files
that do not have any copies. This dramatically improves the speed that the function creates a list of lists of
duplicates"""
    file_sizes = list(map(getsize, file_list))
    file_list = list(
        filter(lambda file: 1 < file_sizes.count(getsize(file)), file_list))
    list_of_lists = []
    while 0 < len(file_list):
        duplicate_files = [file_list.pop(0)]
        for i in range(len(file_list) - 1, -1, -1):
            if compare(duplicate_files[0], file_list[i]):
                duplicate_files.append(file_list.pop(i))
        if 1 < len(duplicate_files):
            list_of_lists.append(duplicate_files)
    return list_of_lists
def fasterSearch(file_list):
    """Looking for duplicate files"""
    cmpDict = {
    }  # compare Dictionary [key:value] = {size of file: {index : names}}
    for i in range(len(file_list)):
        tmp = os.path.getsize(file_list[i])
        cmpDict.setdefault(tmp, {})[len(cmpDict[tmp])] = [
            file_list[i]
        ]  # put names in the next index of size
        for k in range(len(cmpDict[tmp]) - 1):
            if p1utils.compare(
                    file_list[i], cmpDict[tmp][k]
                [0]):  # compare every new element with the original file
                cmpDict[tmp][k].append(file_list[i])  # if True, append list
                cmpDict[tmp].pop(len(cmpDict[tmp]) - 1)  # after append, pop()
    return cmpDict
def search(file_list):
    """Looking for duplicate files"""
    doS = {}  # dictionary of size [key:value] = {size of file: List of List]}
    for i in range(len(file_list)):
        tSize = os.path.getsize(file_list[i])
        if not (tSize in doS):
            doS[tSize] = [[file_list[i]]
                          ]  # add the first file_name with particular size
        elif True:
            for item in doS[tSize]:
                if (p1utils.compare(file_list[i], item[0])):
                    item.append(file_list[i])  # add to its orginal file's list
                    break
                else:
                    doS[tSize].append(
                        [file_list[i]]
                    )  # can't find original file, add as another original file
    return doS
Exemple #13
0
def faster_search(file_list):
    """Looking for duplicate files in the provided list of files
    :returns a list of lists, where each list contains files with the same content

    Here's an idea: executing the compare() function seems to take a lot of time.
    Therefore, let's optimize and try to call it a little less often.
    """
    lol = []
    file_sizes = list(map(getsize, file_list))
    file_list = list(
        filter(lambda file: 1 < file_sizes.count(getsize(file)), file_list))

    while 0 < len(file_list):
        duplicate = [file_list.pop(0)]
        for i in range(len(file_list) - 1, -1, -1):
            if compare(duplicate[0], file_list[i]):
                duplicate.append(file_list.pop(i))
        if 1 < len(duplicate):
            lol.append(duplicate)
    return lol
Exemple #14
0
def faster_search(file_list):
    """Looking for duplicate files in the provided list of files
    :returns a list of lists, where each list contains files with the same content

    Here's an idea: executing the compare() function seems to take a lot of time.
    Therefore, let's optimize and try to call it a little less often.
    """
    lol = []
    k = 0

    while 0 < len(file_list):
        h = file_list.pop(0)
        lol.append([h])
        k += 1
        for i in file_list:
            if getsize(h) == getsize(i):
                if compare(h, i):
                    lol[k - 1].append(i)
            else:
                break
    return lol
Exemple #15
0
def faster_search(file_list):
    """Looking for duplicate files in the provided list of files
    :returns a list of lists, where each list contains files with the same content

    Here's an idea: executing the compare() function seems to take a lot of time.
    Therefore, let's optimize and try to call it a little less often.
    """
    lol = []
    file_sizes = list(map(getsize, file_list))
    duplicates = list(
        filter(lambda x: 1 < file_sizes.count(getsize(x)), file_list))
    for i in duplicates:
        """
        After creating a list of files with duplicate file sizes, compare among this much
        smaller list and appended similarly to search(), creating another list of lists
        """
        copies = [x for x in duplicates if compare(i, x)]
        if 1 < len(copies):
            lol.append(copies)

    return lol
def search(file_list):
    """Looking for duplicate files in the provided list of files
    :returns a dictionary, where each key contains files with the same content

    Basic search strategy goes like this:
    - until the provided list is empty.
    - remove the 1st item from the provided file_list
    - search for its duplicates in the remaining list and put the item and all its duplicates into a new list
    - if that new list has more than one item (i.e. we did find duplicates) save the list in the list of lists
    As a result we have a list, each item of that list is a list,
    each of those lists contains files that have the same content
    """
    lol = {}
    while 0 < len(file_list):
        i = file_list.pop(0)  # pops last item in lst
        for k in file_list:
            if compare(i, k):
                if i not in list(lol.keys()):
                    lol.update({i: []})
                lol[i].append(k)

    return lol
Exemple #17
0
def faster_search(file_list):
    """Looking for duplicate files in the provided list of files
   :returns a list of lists, where each list contains files with the same content

   Here's an idea: executing the compare() function seems to take a lot of time.
   Therefore, let's optimize and try to call it a little less often.
   """
    #Once I found a faster method I would replace the other method with it so I could try to optimise it further
    #This method is the same as the one above because I couldn't find a way to optimise it further
    #This method is faster most of the time by about .05 sec but they're essentially the same
    lol = []
    flSize = list(map(getsize, file_list))
    file_list = list(filter(lambda x: 1 < flSize.count(getsize(x)), file_list))
    while 0 < len(file_list):
        tempLst = [file_list.pop(0)]
        [
            tempLst.append(file_list.pop(i))
            for i in range(len(file_list) - 1, -1, -1)
            if compare(tempLst[0], file_list[i])
        ]
        if len(tempLst) > 1:
            lol.append(tempLst)
    return lol
Exemple #18
0
def search(file_list):
    """Looking for duplicate files in the provided list of files
   :returns a list of lists, where each list contains files with the same content

   Basic search strategy goes like this:
   - until the provided list is empty.
   - remove the 1st item from the provided file_list
   - search for its duplicates in the remaining list and put the item and all its duplicates into a new list
   - if that new list has more than one item (i.e. we did find duplicates) save the list in the list of lists
   As a result we have a list, each item of that list is a list,
   each of those lists contains files that have the same content
   """
    lol = []
    flSize = list(map(getsize, file_list))
    file_list = list(filter(lambda x: 1 < flSize.count(getsize(x)), file_list))
    while 0 < len(file_list):
        tempLst = [file_list.pop(0)]
        for i in range(len(file_list) - 1, -1, -1):
            if compare(tempLst[0], file_list[i]):
                tempLst.append(file_list.pop(i))
        if len(tempLst) > 1:
            lol.append(tempLst)
    return lol
Exemple #19
0
def search(file_list):
    """Looking for duplicate files in the provided list of files

       :returns a list of lists, where each list contains files with the same content

       Basic search strategy goes like this:
       - until the provided list is empty.
       - remove the 1st item from the provided file_list
       - search for its duplicates in the remaining list and put the item and all its duplicates into a new list
       - if that new list has more than one item (i.e. we did find duplicates) save the list in the list of lists
       As a result we have a list, each item of that list is a list,
       each of those lists contains files that have the same content
       """
    lol = []
    while file_list:
        a = file_list.pop()
        l = []
        for x in range(len(file_list) - 1, -1, -1):
            if compare(a, file_list[x]):
                l.append(file_list.pop(x))
        if len(l) > 0:
            l.append(a)
            lol.append(l)
    return lol