Beispiel #1
0
def KS_test_2(data1, data2):
    data1 = np.copy(data1)
    data2 = np.copy(data2)
    mergesort(data1)
    mergesort(data2)
    N1 = len(data1)
    N2 = len(data2)

    D = 0

    i1 = i2 = 0
    f1 = f2 = 0
    while i1 < N1 and i2 < N2:
        d1 = data1[i1]
        d2 = data2[i2]
        if d1 <= d2:
            i1 += 1
            f1 = i1 / N1
        if d2 <= d1:
            i2 += 1
            f2 = i2 / N2
        distance = np.abs(f2 - f1)
        if distance > D:
            D = distance
    N_eff_sqrt = np.sqrt((N1 * N2) / (N1 + N2))
    p_val = 1 - KS_cdf(D * (N_eff_sqrt + 0.12 + 0.11 / N_eff_sqrt))
    return D, p_val
Beispiel #2
0
def median_of_medians(a):
    n = len(a)
    p = range(0, n, 5) + [n]
    sublist = [a[p[i]:p[i+1]] for i in range(len(p)-1)]
    mergelist = [mergesort(s)[len(s)/2] for s in sublist]
    # TODO: make this call recursive
    return mergelist[len(mergelist)/2]
Beispiel #3
0
def search(arr, item):
    """Performs binary search on an array
    with the given item and returns True or
    False.

>>> search([5, 4, 1, 6, 2, 3, 9, 7], 2)
    True

>>> search([5, 4, 1, 6, 2, 3, 9, 7], 8)
    False
    """

    arr = mergesort(arr)

    first = 0
    last = len(arr) - 1
    found = False

    while first <= last and not found:
        midpoint = (first + last) // 2
        if arr[midpoint] == item:
            found = True
        else:
            if item < arr[midpoint]:
                last = midpoint - 1
            else:
                first = midpoint + 1

    return found
Beispiel #4
0
def median_of_medians(a):
    n = len(a)
    p = range(0, n, 5) + [n]
    sublist = [a[p[i]:p[i+1]] for i in range(len(p)-1)]
    mergelist = [mergesort(s)[len(s)/2] for s in sublist]
    # TODO: make this call recursive
    return mergelist[len(mergelist)/2]
Beispiel #5
0
def KS_test(data, cdf):
    data = np.copy(data)
    mergesort(data)
    N = len(data)
    D = 0
    prev_cdf = 0
    for i, sample in enumerate(data):
        data_cdf = (i + 1) / N
        dist_cdf = cdf(sample)
        distance = max(np.abs(data_cdf - dist_cdf),
                       np.abs(prev_cdf - dist_cdf))
        if distance > D:
            D = distance
        prev_cdf = data_cdf
    p_val = 1 - KS_cdf(D * (N**0.5 + 0.12 + 0.11 * N**-0.5))
    return D, p_val
Beispiel #6
0
def Kuipers_test(data, cdf):
    # Note that the p values returned are only accurate if they are small
    # Accurate within 2 decimal places for p < 0.74
    # Thus, a rejection of H0 is real, but the p-values are not distributed
    # as could be expected
    data = np.copy(data)
    mergesort(data)
    N = len(data)
    D_plus = 0
    D_minus = 0
    prev_cdf = 0
    for i, sample in enumerate(data):
        data_cdf = (i + 1) / N
        dist_cdf = cdf(sample)
        distance_plus = data_cdf - dist_cdf
        distance_minus = dist_cdf - prev_cdf
        if distance_plus > D_plus:
            D_plus = distance_plus
        if distance_minus > D_minus:
            D_minus = distance_minus
        prev_cdf = data_cdf
    D = D_minus + D_plus
    p_val = Kuipers_cdf(D * (N**0.5 + 0.155 + 0.24 * N**-0.5))
    return D, p_val
Beispiel #7
0
def test_min_mergesort():
    sorted_items = [KeyedItem(key=i) for i in range(100)]
    items = [item for item in sorted_items]
    random.shuffle(items)
    mergesort(items)
    assert items == sorted_items
Beispiel #8
0
def test_max_mergesort():
    sorted_items = [KeyedItem(key=i) for i in range(99, -1, -1)]
    items = [item for item in sorted_items]
    random.shuffle(items)
    mergesort(items, order='max')
    assert items == sorted_items
Beispiel #9
0
selection_sort = Button(root,
                        text="Selection sort",
                        command=lambda: sorting.selectionsort())
selection_sort.grid(row=0, column=0)

quick_sort = Button(
    root,
    text="Quick sort",
    command=lambda: sorting.quickSort_high_pivot(0,
                                                 len(value_arry) - 1))
quick_sort.grid(row=0, column=1)

merge_sort = Button(
    root,
    text="Merge sort",
    command=lambda: sorting.mergesort(value_arry, rect_arry, 0))
merge_sort.grid(row=0, column=2)

insertion_sort = Button(root,
                        text="Insertion sort",
                        command=lambda: sorting.insertionsort())
insertion_sort.grid(row=0, column=3)

bubble_sort = Button(root,
                     text="Bubble sort",
                     command=lambda: sorting.bubblesort())
bubble_sort.grid(row=0, column=4)

#creates scale setting
array_size = Scale(root, from_=10, to=200, orient=HORIZONTAL, length=400)
array_size.grid(row=0, column=5)
Beispiel #10
0
def test_mergesort():
    list = generateRandomList()
    sorting.mergesort(list)
    assert (is_sorted(list))
Beispiel #11
0
 def test_mergesort(self):
     correct = self.array[::]
     correct.sort()
     sorting.mergesort(self.array)
     self.assertEqual(self.array, correct)
Beispiel #12
0
 def test_mergesort(self):
     correct = self.array[::]
     correct.sort()
     sorting.mergesort(self.array)
     self.assertEqual(self.array, correct)
def continousGini(column, itsLabel):
    a = column[:]
    l = itsLabel[:]
    mergesort(a, l)
    avg_gini = {}
    avg_gini[a[0]] = {}
    i = 0
    #print a
    while i < len(a) - 1:
        avg_gini[int((a[i] + a[i + 1]) / 2)] = {}
        i = i + 1

    avg_gini[a[len(a) - 1]] = {}

    avg_gini_cont = {}

    for x in avg_gini:
        avg_gini_cont[x] = {}
        avg_gini_cont[x]['yes'] = 0
        avg_gini_cont[x]['no'] = 0
        avg_gini[x]['yes'] = 0
        avg_gini[x]['no'] = 0

        for y in avg_gini[x]:
            avg_gini[x][y] = {}
            for z in types_lables:
                avg_gini[x][y][z] = 0

    i = 0
    while i < len(a):
        for x in avg_gini:
            if a[i] <= x:
                avg_gini[x]['yes'][l[i]] = avg_gini[x]['yes'].get(l[i], 0) + 1
            else:
                avg_gini[x]['no'][l[i]] = avg_gini[x]['no'].get(l[i], 0) + 1
        i = i + 1

    for x in avg_gini:
        avg_gini_cont[x]['yes'] = sum(avg_gini[x]['yes'].values())
        avg_gini_cont[x]['no'] = sum(avg_gini[x]['no'].values())

    avg_gini_val = {}

    for x in avg_gini:
        yes_dic = avg_gini[x]['yes']
        no_dic = avg_gini[x]['no']
        yes_tot = 0.0
        no_tot = 0.0
        for y in types_lables:
            yes_tot += yes_dic[y]
            no_tot += no_dic[y]
        yes_psum = 0.0
        no_psum = 0.0
        for y in types_lables:
            if yes_tot != 0:
                yes_psum += pow(float(yes_dic[y]) / float(yes_tot), 2)
            if no_tot != 0:
                no_psum += pow(float(no_dic[y]) / float(no_tot), 2)

        if yes_psum != 0:
            yes_psum = round(1.0 - yes_psum, 4)
        if no_psum != 0:
            no_psum = round(1.0 - no_psum, 4)
        total_sum = 0.0
        total_sum = yes_psum*(float(avg_gini_cont[x]['yes'])/(float(len(a)))) + \
                    no_psum*(float(avg_gini_cont[x]['no'])/(float(len(a))))
        avg_gini_val[x] = round(total_sum, 4)
    #print avg_gini_cont
    #print  avg_gini
    #print  avg_gini_val
    #print len(avg_gini)
    min_gini = min(avg_gini_val.values())
    split_value = avg_gini_val.keys()[avg_gini_val.values().index(min_gini)]
    #print min_gini,split_value
    return min_gini, split_value
def test_mergesort():
    the_list = fill_random_list()
    assert sorting.mergesort(the_list) == sorted(the_list)