def bucket_sort(numbers, num_buckets=10):
    """Sort given numbers by distributing into buckets representing subranges,
    then sorting each bucket and concatenating all buckets in sorted order.
    Running time: O(n.logn) since merge sort is called
    Memory usage: O(n) since we make an extra 2d array for items"""
    # Find range of given numbers (minimum and maximum values)
    if len(numbers) < 2:
        return
    minm = min(numbers)
    maxm = max(numbers)
    bucket_range = (maxm - minm) // num_buckets + 1

    # Create list of buckets to store numbers in subranges of input range
    buckets = [[] for _ in range(num_buckets)]

    # Loop over given numbers and place each item in appropriate bucket
    for item in numbers:
        index = (item - minm) // bucket_range
        buckets[index].append(item)

    # Sort each bucket using any sorting algorithm (recursive or another)
    for i in range(len(buckets)):
        merge_sort(buckets[i])

    # Improve this to mutate input instead of creating new output list

    i = 0
    for bucket in buckets:
        for item in bucket:
            numbers[i] = item
            i += 1
예제 #2
0
 def test_merge_sort(self):
     items1 = [
         'Absol', 'Gyrados', 'Milotic', 'Pikachu', 'Charizard', 'Mew',
         'Misdreavous', 'Absol'
     ]
     merge_sort(items1)
     assert items1 == [
         'Absol', 'Absol', 'Charizard', 'Gyrados', 'Mew', 'Milotic',
         'Misdreavous', 'Pikachu'
     ]
예제 #3
0
def bucket_sort(numbers):
    """Sort given numbers by distributing into buckets representing subranges,
       then sorting each bucket and concatenating all buckets in sorted order.

       Running time: O(n * subranges), where n is the size of the numbers array
       and subranges repesents the size of the duplicates. This is because the
       runtime of this method asymptotically scales with respect to the time
       it takes to place all the elements in buckets. This step will grow with
       the total number of elements being sorted. It will also grow with the
       number of duplicates, because they will end up in the same bucket, and
       that will increase the runtime we use to execute merge sort on that
       bucket. In the best case subranges is much less than n, so the overall
       complexity tends towards O(n). In the worst case all the values are
       duplicates. so they end up in the same bucket, and are sorted in
       the same time it would take to just use merge sort by itself on numbers.

       Memory usage: O(n^2) because the memory required to execute
       this function scales with the size of the buckets array we construct.
       The number of elements in this list is n, and the number of LinkedList
       nodes across all the array positions is also n.

    """
    buckets = make_buckets(numbers)
    # Sort each bucket using any sorting algorithm (recursive or another)
    for index, bucket in enumerate(buckets):  # n iterations
        if bucket.size > 0:  # this has less iterations as duplicates increase
            values = bucket.items()
            merge_sort(values)  # linearithmic time, over only a subset
            buckets[index] = LinkedList(values)  # linear for only a subset
    # Loop over buckets and append each bucket's numbers into output list
    numbers_index = 0
    for index, bucket in enumerate(buckets):  #
        b_size = bucket.size
        if b_size > 0:
            for i in range(b_size):
                numbers[numbers_index + i] = bucket.get_at_index(i)
            numbers_index += b_size
 def test_sort_on_large_list(self):
     T1 = [5, 1, 6, 78, 5, 1, 2, 5, 1]
     merge_sort(T1)
     assert T1 == [1, 1, 1, 2, 5, 5, 5, 6, 78]
     T2 = [5, 1, 2, 56, 2, 6, 78, 8, 90, 124]
     merge_sort(T2)
     assert T2 == [1, 2, 2, 5, 6, 8, 56, 78, 90, 124]
     T3 = [56, 1, 3, 634, 7, 8, 2, 123, 1, 2]
     merge_sort(T3)
     assert T3 == [1, 1, 2, 2, 3, 7, 8, 56, 123, 634]
 def test_sort_on_small_list(self):
     T1 = [39, 6, 8, 12, 10]
     merge_sort(T1)
     assert T1 == [6, 8, 10, 12, 39]
     T2 = [73, 61, 5, 12]
     merge_sort(T2)
     assert T2 == [5, 12, 61, 73]
     T3 = [20, 20, 91, 24]
     merge_sort(T3)
     assert T3 == [20, 20, 24, 91]
 def test_sort_on_decimal_with_negative_list(self):
     T1 = [9.2, -5.3, 2.3, 7.5]
     merge_sort(T1)
     assert T1 == [-5.3, 2.3, 7.5, 9.2]
     T2 = [6.2, -12.12, 75.3, 12.12]
     merge_sort(T2)
     assert T2 == [-12.12, 6.2, 12.12, 75.3]
     T3 = [-9.5, 5.6, -89.1, 12.2]
     merge_sort(T3)
     assert T3 == [-89.1, -9.5, 5.6, 12.2]
예제 #7
0
    # Get sort function by name
    if len(args) >= 1:
        sort_name = args[0]
        # Terrible hack abusing globals
        if sort_name in globals():
            sort_function = globals()[sort_name]
            return sort_function
        else:
            # Don't explode, just warn user and show list of sorting functions
            print('Sorting function {!r} does not exist'.format(sort_name))
            print('Available sorting functions:')
            for name in globals():
                if 'sort' in name:
                    print('    {}'.format(name))
            return


# If using PyTest, change this variable to the sort function you want to test
sort = selection_sort

if sort == merge_sort:
    sorted_items = merge_sort(items)
    sort = sorted_items[:]

if __name__ == '__main__':
    # Get sort function from command-line argument
    # FIXME: This is causing unittest to throw an error
    # sort = get_sort_function()
    unittest.main()
예제 #8
0
 def test_merge_sort(self):
     for _ in range(100):
         random_and_sorted = _generate_testcase()
         SortRecur.merge_sort(random_and_sorted[0])
         assert random_and_sorted[0] == random_and_sorted[1]
예제 #9
0
 def test_merge_sort_strings(self):
     l1 = ["banana", "apple", "orange"]
     merge_sort(l1)
     assert l1 == ["apple", "banana", "orange"]
예제 #10
0
 def test_merge_sort_negs(self):
     l1 = [-3, 5, -7, -4]
     merge_sort(l1)
     assert l1 == [-7, -4, -3, 5]
예제 #11
0
 def test_merge_sort(self):
     l1 = [1, 3, 7, 4, 6, 10, 5, 4]
     merge_sort(l1)
     assert l1 == [1, 3, 4, 4, 5, 6, 7, 10]