Exemple #1
0
def determineSizeOfLastRock(weights):
    #we need to get a list
    #we need to sort list
    #loop until 1 or no rock remains
    #each iteration smash rocks and add remainder rock to collection in correct order

    #validate list
    if weights == None or weights == []: return 0
    weights = list(filter(validateInput, weights))

    #sort list
    numRocks = len(weights)
    Sort.quicksort(weights)

    #loop until weights are down to one entry
    while (numRocks > 1):
        newRock = abs(weights[numRocks - 1] - weights[numRocks - 2])

        del weights[numRocks - 1]
        del weights[numRocks - 2]
        numRocks -= 2

        if newRock > 0:
            Insert.insertInOrderBinary(weights, newRock)
            numRocks += 1

    #handle perfect cancellation case
    if numRocks < 1:
        return 0
    else:
        return weights[0]
Exemple #2
0
def __buildArticleList(pages,articleInds,pageLimit,numArticles):
    articles = []
    numPages=0
    for i in range(numArticles):
        numPages+=pages[i]
        if numPages>pageLimit:
            numPages-=pages[i]
            continue
        articles.append(articleInds[i])
    Sort.quicksort(articles)

    return articles
Exemple #3
0
def kNN_g0(pt, pts, num=1):
    pts = array(pts)
    minElt = pts[0]
    dists = []

    #get the distsances to each of the points
    for oPt in pts:
        dists.append(distance(oPt, pt))

    sort_dists, I = quicksort(dists)

    #while we don't have enough points, keep adding them
    knn_pts = []
    knn_ind = []
    knn_dist = []
    i = 0

    while (len(knn_pts) < num and i < len(pts)):
        if (not sort_dists[i] == 0):
            knn_pts.append(pts[I[i]])
            knn_ind.append(I[i])
            knn_dist.append(sort_dists[i])
        i = i + 1

    return array(knn_pts), knn_ind, knn_dist
Exemple #4
0
def flickr_stats(filename, filter_filename=None):
    prior = cPickle.load(open(filename, 'r'))

    print "number of tags:", len(prior.keys())

    myfilter = None
    if(filter_filename != None):
        myfilter = filter_load(filter_filename)
    
    mytags_hash = {}
    
    for obj in prior.keys():
        location = prior[obj]
        
        #for elt in myfilter:
        try:
            if(myfilter != None and myfilter[obj]):
                mytags_hash[obj] = sum(location.values())
            elif(myfilter == None):
                mytags_hash[obj] = sum(location.values())
        except:
            continue

                    
    num_locations = len(prior.keys())
    num_objects = len(mytags_hash.keys())

    print "number of locations:", num_locations
    print "number of objects:", num_objects
    
    K = array(mytags_hash.keys())
    Vs = array(mytags_hash.values())
    V, I = quicksort(mytags_hash.values())

    print "number of flowers:", mytags_hash["flower"]

    #for i in range(len(mytags_hash.keys())):
    #    print mytags_hash.keys()[i], mytags_hash.values()[i]
    #print "final key", K[I[len(V)-1]]
    #print "final value", Vs[I[len(V)-1]]

    print len(I), len(V)
    
    mf_keys = K.take(I[len(I)-100:len(I)]).tolist()
    mf_vals = array(Vs).take(I[len(I)-100:len(I)]).tolist()


    mf_keys.reverse()
    mf_vals.reverse()

    p2 = bar(arange(len(mf_vals)), mf_vals, color='b', width=0.8)

    setp(gca(), 'xticks', arange(len(mf_vals)))
    labels = setp(gca(), 'xticklabels', mf_keys)
    setp(labels, 'rotation', 'vertical')

    print mf_keys
    #labels = xticks(arange(len(mf_vals)), mf_keys)
    #xticks(arange(len(mf_vals)), mf_keys)
    show()
Exemple #5
0
def flickr_stats(filename, object, filter_filename):

    prior = cPickle.load(open(filename, 'r'))

    #plot the curve
    mu = mean(prior[object].values())
    v = var(prior[object].values())

    print mu
    print v

    X = array(range(0, max(prior[object].values())))
    Y = 1.0 / (1.0 + exp(-1.0 * (X - mu - min(prior[object].values())) / v))
    print "x", X
    print "x-mu", (X - mu) / v
    print "y:", Y

    plot(X, Y)
    xlabel("object count")
    ylabel("probability of " + object)

    figure()

    filter = filter_load(filter_filename)

    mytags_hash = prior[object]

    for key in mytags_hash.keys():
        if (not key in filter):
            mytags_hash.pop(key)

    K = array(mytags_hash.keys())
    V, I = quicksort(mytags_hash.values())

    mf_keys = K.take(I[len(I) - 20:len(I)]).tolist()
    mf_vals = array(mytags_hash.values()).take(I[len(I) - 20:len(I)]).tolist()

    #mf_keys.reverse()
    #mf_vals.reverse()

    p2 = barh(arange(len(mf_vals)), mf_vals, color='b', height=0.8)

    setp(gca(), 'yticks', arange(len(mf_vals)))
    labels = setp(gca(), 'yticklabels', mf_keys)
    #setp(labels, 'rotation', 'vertical')

    #labels = xticks(arange(len(mf_vals)), mf_keys)
    #xticks(arange(len(mf_vals)), mf_keys)

    title("Location counts for " + object)
    show()
Exemple #6
0
def plot_roc_curve(model, mykeyword, learner):
    training_docs, test_docs, train_label, test_label = model.get_training_test_sets(
        mykeyword, 0.8)
    print test_label

    Scores = []
    Thresholds = set([])
    for i, doc_i in enumerate(test_docs):
        res = model.predict(mykeyword, model.documents[doc_i], learner=learner)
        if (res == None):
            continue

        Scores.append(res[1].values()[-1])
        Thresholds.add(res[1].values()[-1])

    Thresholds = list(Thresholds)
    Thresholds.sort()
    print "scores:", Scores

    TPR = []
    FPR = []
    for i, s in enumerate(Thresholds):
        #print "thresh:", s
        #print "diff", abs(Thresholds[i]-Thresholds[i-1])
        #print "diff", abs(Thresholds[i]-Thresholds[i-1]) < 10^-2
        #print 10**-2
        #if(i > 0 and abs(Thresholds[i]-Thresholds[i-1]) < 10**-3):
        #    print "cont"
        #    continue

        tp, fp, tn, fn = get_statistics(Scores, s, test_label)

        if (tp + fn == 0):
            TPR.append(0)
        else:
            TPR.append((1.0 * tp) / (tp + fn))

        if (fp + tn == 0):
            FPR.append(0)
        else:
            FPR.append((1.0 * fp) / (fp + tn))

    FPR_srt, I = quicksort(FPR)
    TPR = array(TPR)
    TPR_srt = TPR.take(I)

    plot(FPR_srt, TPR_srt, 'kx-')
    title(mykeyword)
    xlabel("False positive rate")
    ylabel("True positive rate")
Exemple #7
0
def get_roc_all(myhash_gtruth, myhash_pclass, 
                obj_type, type_detector):
    thresholds = arange(0, 1, 0.005)

    if(type_detector=='prob_mrf'):
        thresholds = []
        for elt in myhash_pclass.keys():
            thresholds.append(myhash_pclass[elt])
        thresholds.sort()

    tp_rate = []
    fp_rate = []
    print "type_detector", type_detector

    for thresh in thresholds:
        if(fmod(thresh, 0.01) == 0):
            print thresh

        tp, tn, fp, fn  = get_tp_fp(myhash_gtruth, myhash_pclass, 
                                    obj_type, type_detector, thresh)

        if(not tp == None and not fp == None and tp+fp > 0 and fp+tn > 0):
            tp_rate.append((tp*1.0)/(tp+fp)*1.0)
            fp_rate.append((fp*1.0)/(fp+tn)*1.0)
    

            print "threshold:", thresh, " tp:", tp_rate[-1], " fp:", fp_rate[-1]
            #raw_input()

    #sort the tp rates
    Vfp, I = quicksort(fp_rate)
    
    tp_rate = array(tp_rate)
    Vtp = tp_rate.take(I)
    
    area = 0
    for i in range(1, len(Vfp)):
        #print "recalls", recalls[i] - recalls[i-1]
        #print "precision", precisions[i]

        area += abs(Vtp[i-1] - Vtp[i])*Vfp[i-1]
        #raw_input()

    #if(area == 0):
    #    for i in range(1, len(recalls)):
    #        area += abs(recalls[i-1] - recalls[i])*precisions[i]


    return Vtp, Vfp, area
Exemple #8
0
def kNN(pt, pts, num=1):
    pts = array(pts)
    minElt = pts[0]
    dists = []

    #get the distsances to each of the points
    for oPt in pts:
        dists.append(distance(oPt, pt))

    sort_dists, I = quicksort(dists)

    knn_pts = []
    for i in range(num):
        knn_pts.append(pts[I[i]])

    return array(knn_pts), I[0:num], sort_dists[0:num]
def spectral_clustering_auto(X, r, k=None, kMax=None, W=None, seed_number=987):
    ''' return labels for each data point
    
    runs spectral clustering on X and displays the resulting
    clustering, overlayed onto the neighbourhood graph used.
    
    Arguments:  X -- the data
    r -- number of neighbors
    k -- number of resulting clusters
    t -- the threshold for the selection of eigenvalues near 1
    '''

    #create the weight matrix and normalize it
    if (W is None):
        W = weights_perona(X, r)

    Dinv = diag(1 / (1.0 * sum(W, 1)))
    M = dot(dot(sqrt(Dinv), W), sqrt(Dinv))

    #get the eigenvectors, which are now in the columnsx
    U, E, V = svd(M)
    V = transpose(U)
    sortedE, I = quicksort(E)

    #get only the relevant eigenvectors based on the number of classes
    #that we have selected
    V_sorted = []
    for j in range(len(I) - kMax, len(I)):
        V_sorted.append(V[I[j], :])

    #normalize the rows of the eigenvectors
    V = transpose(V_sorted)
    V_pr, k = get_number_of_clusters(V, kMax)

    divisor = 1.0 / (sum(sqrt(V_pr**2), 1) + (10.0**(-30)))
    V_pr = dot(diag(divisor), V_pr)

    #use optimized methods here
    print "random seed:", seed_number
    tklib_init_rng(seed_number)
    means = kmeans_autoinit(transpose(V_pr), 100, k)
    labels = kmeans_get_labels(transpose(V_pr), means)

    return labels, k
Exemple #10
0
def labelme_stats(filename):
    myfile = open(filename, 'r')

    mytags = {}
    num_lines = 0

    for line in myfile:
        num_lines += 1
        tags = line.split(',')[3:]

        for tag in tags:
            if (tag == " " or tag == ''):
                continue
            try:
                mytags[tag] += 1
            except:
                mytags[tag] = 1

    print "number of images", num_lines
    print "number of unique tags", len(mytags.keys())

    K = array(mytags.keys())
    V, I = quicksort(mytags.values())

    mf_keys = K.take(I[-100:-2]).tolist()
    mf_vals = array(mytags.values()).take(I[-100:-2]).tolist()

    mf_keys.reverse()
    mf_vals.reverse()

    p2 = bar(arange(len(mf_vals)), mf_vals, color='b', width=0.8)

    setp(gca(), 'xticks', arange(len(mf_vals)))
    labels = setp(gca(), 'xticklabels', mf_keys)
    setp(labels, 'rotation', 'vertical')

    print mf_keys
    #labels = xticks(arange(len(mf_vals)), mf_keys)
    #xticks(arange(len(mf_vals)), mf_keys)
    show()
def spectral_clustering(X,
                        r,
                        t=None,
                        k=None,
                        W=None,
                        numkmeans=1,
                        seed_number=987,
                        max_dist=10e10):
    ''' return labels for each data point
    
    runs spectral clustering on X and displays the resulting
    clustering, overlayed onto the neighbourhood graph used.
    
    Arguments:  X -- the data
    r -- number of neighbors
    k -- number of resulting clusters
    t -- the threshold for the selection of eigenvalues near 1
    '''
    print "running with", "t=", t, "k=", k, "max_dist=", max_dist

    #create the weight matrix and normalize it
    if (W is None):
        W = weights_perona(X, r)

    Dinv = diag(1 / (1.0 * sum(W, 1)))
    M = dot(dot(sqrt(Dinv), W), sqrt(Dinv))

    #get the eigenvectors, which are now in the columnsx
    U, E, V = svd(M)
    V = transpose(U)
    sortedE, I = quicksort(E)

    #estimate the number of clusters by looking at the eigenvalues
    if (t == None):
        #this has worked really well: t=0.195
        #t=0.01
        #t=0.17
        t = 0.185
        #t=0.195
        #t=0.18
    if (k == None):
        k = 0
        for i in range(len(sortedE)):
            if ((1.0 - sortedE[len(sortedE) - i - 1]) > t):
                break
            k = k + 1

        if (k == 0):
            print "There are no eigenvalues of 1"
            print "The closest is:", sortedE[len(sortedE) - 1]
            return

    #print "E:", E
    print "number of clusters", k

    #get only the relevant eigenvectors based on the number of classes
    #that we have selected
    V_sorted = []
    E_svals = []
    for j in range(len(I) - k, len(I)):
        V_sorted.append(V[I[j], :])
        E_svals.append(E[I[j]])

    #print len(I)
    #print range(len(I)-k,len(I))
    #print "number of ev:", len(range(len(I)-k,len(I)))
    #raw_input()
    #normalize the rows of the eigenvectors

    V = transpose(V_sorted)
    #print "V",
    #for elt in V:
    #    print elt

    divisor = 1.0 / (sum(sqrt(V**2), 1) + (10.0**(-30)))
    V = dot(diag(divisor), V)

    #try to weight the vector values
    V = V * array([E_svals])

    #print "V_norm", V
    #for elt in V:
    #    print elt

    #use optimized methods here
    print "random seed:", seed_number
    tklib_init_rng(seed_number)

    mylabels = []
    for i in range(numkmeans):
        print "performing k-means on ", k, "clusters"
        means = kmeans_autoinit(transpose(V), 500, k)
        mylabels.append(kmeans_get_labels(transpose(V), means))

        D = array(kmeans_get_distances(transpose(V), means))
        print "d", D.shape
        for j, label in enumerate(mylabels[-1]):
            print label, j
            if (D[int(label), j] > max_dist):
                mylabels[-1][j] = nan

    return mylabels, k
Exemple #12
0
def test_quicksort():
    test_list = generateRandomList()
    sorting.quicksort(test_list)
    assert (is_sorted(test_list))
Exemple #13
0
    1
    """
    global cpt
    cpt = cpt + 1
    if a == b:
        return 0
    elif a < b:
        return -1
    else:
        return 1


if __name__ == "__main__":
    cpt = 0
    t = generate.random_list(1000)
    tt = sorting.merge_sort(t, cmp)
    print(tt)
    if generate.is_sorted(tt):
        print("Yes !!")
    else:
        raise Exception("List has not been correctly sorted by merge sort")
    print(cpt)

    print(t)
    sorting.quicksort(t, cmp)
    print(cpt)
    if generate.is_sorted(t):
        print("Yes !!")
    else:
        raise Exception("List has not been correctly sorted by quicksort")
Exemple #14
0
import random
from sorting import selection_sort, bubble_sort, insertion_sort
from sorting import mergesort, quicksort


any_numbers = random.sample(range(1, 1000), 42)

already_sorted = [1, 2, 3, 4, 5, 6, 9, 20, 22, 23, 28, 
                    32, 34, 39, 40, 42, 76, 87, 99, 112]

inversed = [117, 90, 88, 83, 81, 77, 74, 69, 64, 63, 51,
            50, 49, 42, 41, 34, 32, 29, 28, 22, 16, 8, 6, 5, 3, 1]

repeated = [7, 7, 7, 7, 7, 1, 1, 9, 9, 0, 4, 4, 4, 5, 4, 5, 7, 1,]

if __name__ == "__main__":
    test_cases = {'Números aleatórios': any_numbers, 
                    'Já ordenados': already_sorted, 
                    'Ordem inversa': inversed, 
                    'Elementos repetidos': repeated
                }
    print("*******************************")
    for name, lista in test_cases.items():
        print("\nCaso de teste: {}".format(name))
        print(lista)
        quicksort(lista)
        print("\n Ordenado:")
        print(lista)
    print("*******************************")
Exemple #15
0
def plot_distance_curve_iros(ofile,
                             corpus,
                             tag_file,
                             marker,
                             color,
                             thelabel='',
                             use_strict_correctness=False,
                             followedState=None,
                             sentence_i_to_run=None,
                             linestyle="-"):

    Dists = []
    threshold = 10
    num_correct = 0
    total = 0.0
    for i in range(len(ofile['path'])):
        if (ofile['sentences'][i] == None):
            print "sentence", i, "was", ofile['sentences'][i]
            continue

        rst, rend = ofile['regions'][i].split("to")
        rst = rst.strip()
        rend = rend.strip()

        direction = corpus.directions[i]

        if followedState != None and direction.was_followed != followedState:
            assert direction.start == rst, (direction.start, rst)
            assert direction.end == rend, (direction.end, rend)
            continue

        #t2 = ofile['region_to_topology'][rend]

        #iterate over all the topologies in the final region
        curr_d = 70.0
        t2_loc = transpose(tag_file.get_tag_locations(rend))[0]

        #for myelt in t2:
        #    t2_loc = ofile['tmap_locs'][myelt]

        #iterate over all of the paths that end in the location
        for k in range(len(ofile['path'][i])):
            if ofile['path'][i][k] == None:
                continue

            t1 = ofile['path'][i][k][-1]
            t1 = float(t1.split("_")[0])
            t1_loc = ofile["tmap_locs"][t1]

            if (tklib_euclidean_distance(t2_loc, t1_loc) < curr_d):
                curr_d = tklib_euclidean_distance(t2_loc, t1_loc)

        if use_strict_correctness and sentence_i_to_run != None:
            raise ValueError("Must pass one or the other and not both." +
                             ` use_strict_correctness ` + " and " +
                             ` sentence_i_to_run `)

        if use_strict_correctness or sentence_i_to_run != None:
            if use_strict_correctness:
                best_scoring_run_k = argmax(ofile['probability'][i])
            elif sentence_i_to_run != None:
                best_scoring_run_k = sentence_i_to_run[myelt]
                if ofile['path'][i][best_scoring_run_k] == None:
                    best_scoring_run_k = argmax(ofile['probability'][i])

            t1 = float(ofile['path'][i][best_scoring_run_k][-1].split("_")[0])
            t1_loc = ofile["tmap_locs"][t1]
            curr_d = tklib_euclidean_distance(t2_loc, t1_loc)

        if curr_d < threshold or ofile['correct'][i][0]:
            num_correct += 1
        total += 1
        Dists.append(curr_d)

    print thelabel, "num_correct less than %.2f meters: %d  (%.3f%%)" % (
        threshold, num_correct, 100.0 * num_correct / total)
    Y = []
    X = []
    for threshold in Dists:

        #get the ones above the threshold
        #print nonzero(array(Dists) > threshold)
        #print array(Dists) > threshold
        Itrue, = nonzero(array(Dists) <= threshold)

        Y.append(len(Itrue) / (1.0 * len(Dists)))
        X.append(threshold)

    X, I = quicksort(X)
    Y = array(Y).take(I)

    p = plot_markers_evenly(X,
                            Y,
                            thelabel,
                            marker,
                            color,
                            linewidth=2.5,
                            linestyle=linestyle)
    mpl.xlabel('distance from destination (m)')
    mpl.ylabel('proportion correct')
    #draw()
    #show()
    #raw_input()
    return p
Exemple #16
0
def plot_distance_curve_subject(ofile,
                                create_figure=True,
                                mystyle=None,
                                best_sub_only=False,
                                best_question_only=False,
                                included_subjects=None):

    styles = [
        "ro-", "b^-", "k>-", "g<-", "ro--", "b^--", "k>--", "g<--", "ro-.",
        "b^-.", "k>-.", "g<-.", "ro:", "b^:", "k>:", "g<:"
    ]
    if (create_figure):
        figure()
    Dists = {}
    Dists_question = {}
    for i in range(len(ofile['path'])):
        #Dists.append([])

        if (ofile['sentences'][i] is None):
            print "sentence", i, "was", ofile['sentences'][i]
            continue

        rst, rend = ofile['regions'][i].split("to")
        rend = rend.strip()

        t2 = ofile['region_to_topology'][rend]

        #iterate over all the topologies in the final region
        curr_d = 100000000000000000000000000000.0
        for myelt in t2:
            t2_loc = ofile['tmap_locs'][myelt]

            #iterate over all of the paths that end in the location
            for k in range(len(ofile['path'][i])):

                path = ofile['path'][i][k]
                if path is None:
                    curr_d = 100000000000000000
                else:
                    t1 = path[-1]
                    t1 = float(t1.split("_")[0])
                    t1_loc = ofile["tmap_locs"][t1]
                    if (math2d_dist(t2_loc, t1_loc) < curr_d):
                        curr_d = math2d_dist(t2_loc, t1_loc)

        #subjects
        if (not Dists.has_key(ofile["subjects"][i])):
            Dists[ofile["subjects"][i]] = []

        Dists[ofile["subjects"][i]].append(curr_d)

        #regions
        if (not Dists_question.has_key(ofile["regions"][i])):
            Dists_question[ofile["regions"][i]] = []

        Dists_question[ofile["regions"][i]].append(curr_d)

    xlabel('distance from destination (m)')
    ylabel('percentage correct')

    mylabel = None
    if (best_sub_only):
        dvals = sum(Dists.values(), axis=1)
        i = argmin(dvals)
        new_vals = Dists.values()[i]
        new_key = Dists.keys()[i]

        Dists = {}
        Dists[new_key] = new_vals

        #mylabel=thelabel+" (Best Subject)"

    if (best_question_only):
        dvals = sum(Dists_question.values(), axis=1)
        i = argmin(dvals)
        new_vals = Dists_question.values()[i]
        new_key = Dists_question.keys()[i]

        Dists = {}
        Dists[new_key] = new_vals

        #mylabel=thelabel+" (Best Question)"

    plots = []
    for k, subject in enumerate(Dists.keys()):
        if included_subjects != None and not subject in included_subjects:
            continue
        Y = []
        X = []
        for threshold in Dists[subject]:
            #get the ones above the threshold

            Itrue, = nonzero(array(Dists[subject]) <= threshold)

            Y.append(len(Itrue) / (1.0 * len(Dists[subject])))
            X.append(threshold)

        X, I = quicksort(X)
        Y = array(Y).take(I)
        sub_plt = subject.replace("Subject", "Sub.")
        if mystyle is None:
            style = styles[k % len(styles)]
        else:
            style = mystyle

        if (X[0] > 0.0):
            Xf = [X[0]]
            Xf.extend(X)
            Yf = [0]
            Yf.extend(Y)
            X = Xf
            Y = Yf

        plots.extend(plot(X, Y, style, label=sub_plt, linewidth=2.5))

        num_correct_at_threshold = len(nonzero(array(Dists[subject]) <= 10)[0])
        print k, subject, "less than 10 meters", num_correct_at_threshold,
        print "%.3f%%" % ((100.0 * num_correct_at_threshold) /
                          (1.0 * len(Dists[subject])))
    return plots
Exemple #17
0
def plot_roc_curve(ofile):
    figure()
    probs_orig = ofile['probability']
    correctness_orig = ofile['correct_neigh']

    #get the probs
    probs = []
    for ps_i, ps in enumerate(probs_orig):
        if (not array(ps).max() is None):
            probs.append(array(ps).max())
        else:
            probs.append(0)

        #if(len(probs) == 0):
        #    print "is empty"
        #    raw_input()

    # get whether the question was correct
    correctness = []
    for crr in correctness_orig:
        if (True in crr):
            correctness.append(True)
        else:
            correctness.append(False)

    #root by the length of the path
    i = 0
    for elt in ofile['keywords']:
        print "test", elt
        print "after test"
        print "probs[i]=", probs[i]
        if (len(elt) > 0):
            probs[i] = pow(probs[i], 1 / (1.0 * len(elt) - 1))
        else:
            probs[i]
        i += 1

    print "number correct:", sum(correctness)
    print "total directions:", len(correctness)
    TPR = []
    FPR = []
    for threshold in probs:

        #get the ones above the threshold
        #print nonzero(array(probs) > threshold)
        #print array(probs) > threshold
        Itrue, = nonzero(array(probs) >= threshold)
        iscorrect = array(correctness).take(Itrue)

        TP = sum(iscorrect) * 1.0
        TP_FP = len(iscorrect) * 1.0
        FP = TP_FP - TP

        #get the ones below the threshold
        Ifalse, = nonzero(array(probs) <= threshold)
        is_not_correct = array(correctness).take(Ifalse)

        FN = sum(is_not_correct) * 1.0
        TN_FN = len(is_not_correct) * 1.0
        TN = TN_FN - FN

        TPR.append(1.0 * TP / ((TP + FN) + 0.00000000001))
        FPR.append(1.0 * TN / ((TN + FP) + 0.00000000001))

    V, I = quicksort(FPR)

    X = array(FPR).take(I)
    Y = array(TPR).take(I)

    plot(X, Y, 'r-', linewidth=2.5)
    #font = FontProperties(size='x-small')
    xlabel('false positive rate')
    ylabel('true positive rate')

    AUC = 0.0
    for i in range(len(X) - 1):
        AUC += (X[i + 1] - X[i]) * Y[i]

    title("AUC=" + str(AUC))
    draw()
Exemple #18
0
 def test_quicksort(self):
     correct = self.array[::]
     correct.sort()
     sorting.quicksort(self.array)
     self.assertEqual(self.array, correct)        
Exemple #19
0
 def test_quicksort(self):
     correct = self.array[::]
     correct.sort()
     sorting.quicksort(self.array)
     self.assertEqual(self.array, correct)
Exemple #20
0
def plot_distance_curve(ofile,
                        corpus,
                        marker,
                        color,
                        thelabel='',
                        use_strict_correctness=False,
                        followedState=None,
                        sentence_i_to_run=None,
                        linestyle="-"):

    Dists = []
    threshold = 10
    num_correct = 0
    total = 0.0
    for i in range(len(ofile['path'])):
        if (ofile['sentences'][i] is None):
            print "sentence", i, "was", ofile['sentences'][i]
            continue

        rst, rend = ofile['regions'][i].split("to")
        rst = rst.strip()
        rend = rend.strip()

        direction = corpus.directions[i]

        if followedState != None and direction.was_followed != followedState:
            assert direction.start == rst, (direction.start, rst)
            assert direction.end == rend, (direction.end, rend)
            continue

        #print "r", ofile['region_to_topology']
        t2 = ofile['region_to_topology'][rend]

        #iterate over all the topologies in the final region
        curr_d = 70.0
        for myelt in t2:
            t2_loc = ofile['tmap_locs'][myelt]

            #iterate over all of the paths that end in the location
            for k in range(len(ofile['path'][i])):
                if ofile['path'][i][k] is None:
                    continue

                t1 = ofile['path'][i][k][-1]
                t1 = float(t1.split("_")[0])
                t1_loc = ofile["tmap_locs"][t1]

                if (math2d_dist(t2_loc, t1_loc) < curr_d):
                    curr_d = math2d_dist(t2_loc, t1_loc)

            if use_strict_correctness and sentence_i_to_run != None:
                raise ValueError("Must pass one or the other and not both." +
                                 ` use_strict_correctness ` + " and " +
                                 ` sentence_i_to_run `)

            if use_strict_correctness or sentence_i_to_run != None:
                if use_strict_correctness:
                    best_scoring_run_k = argmax(ofile['probability'][i])
                elif sentence_i_to_run != None:
                    best_scoring_run_k = sentence_i_to_run[myelt]
                    if ofile['path'][i][best_scoring_run_k] is None:
                        best_scoring_run_k = argmax(ofile['probability'][i])

                t1 = float(
                    ofile['path'][i][best_scoring_run_k][-1].split("_")[0])
                t1_loc = ofile["tmap_locs"][t1]
                curr_d = math2d_dist(t2_loc, t1_loc)

        if curr_d < threshold or ofile['correct'][i][0]:
            num_correct += 1
        total += 1
        Dists.append(curr_d)

    all_visited_topos = []
    import cPickle
    model = cPickle.load(open(ofile["options"]["model_fn"], 'r'))
    print "len", len(ofile["visited_viewpoints"])
    for visited_vps in ofile["visited_viewpoints"]:
        assert len(visited_vps) == 1, len(visited_vps)

        visited_topos = set()
        for vp in visited_vps[0]:
            #print "vp", vp
            topo_i, orient = vp.split("_")
            visited_topos.add(topo_i)
        all_visited_topos.append(
            float(len(visited_topos)) / len(model.tmap_locs))
    #print "visited", all_visited_topos
    #print "ofile", ofile["path"][0]
    print "average # of nodes visited", mean(all_visited_topos)
    print thelabel, "num_correct less than %.2f meters: %d  (%.3f%%), visited %.3f%%" % (
        threshold, num_correct, 100.0 * num_correct / total,
        100 * mean(all_visited_topos))
    Y = []
    X = []
    for threshold in Dists:

        #get the ones above the threshold
        #print nonzero(array(Dists) > threshold)
        #print array(Dists) > threshold
        Itrue, = nonzero(array(Dists) <= threshold)

        Y.append(len(Itrue) / (1.0 * len(Dists)))
        X.append(threshold)

    X, I = quicksort(X)
    Y = array(Y).take(I)

    p = plot_markers_evenly(X,
                            Y,
                            thelabel,
                            marker,
                            color,
                            linewidth=2.5,
                            linestyle=linestyle)
    xlabel('distance from destination (m)')
    ylabel('proportion correct')
    #draw()
    #show()
    #raw_input()
    return p
Exemple #21
0
def test_max_quicksort():
    sorted_items = [KeyedItem(key=i) for i in range(99, -1, -1)]
    items = [item for item in sorted_items]
    random.shuffle(items)
    quicksort(items, order='max')
    assert items == sorted_items
Exemple #22
0
def plot_distance_curve_random(model,
                               corpus_fn,
                               gtruth_tag_fn,
                               map_fn,
                               color,
                               marker,
                               label='',
                               linestyle="-",
                               region_to_topology=None):
    """
    Needs the viewpoints and stuff from the model. 
    """
    print "starting random"
    dsession = readSession(corpus_fn, "none")
    if gtruth_tag_fn != None:
        tf = tag_file(gtruth_tag_fn, map_fn)
        topohash = get_region_to_topo_hash_containment(tf, model)
    else:
        topohash = region_to_topology
    Dists = []
    for elt in dsession:
        for i in range(len(elt.routeInstructions)):

            if (elt.columnLabels[i] is None):
                print "sentence", i, "was", elt.columnLabels[i]
                continue

            start_true, end_true = elt.columnLabels[i].split("to")
            start_true = str(start_true.strip())
            end_true = str(end_true.strip())
            iSlocTopo = topohash[start_true][0]
            iElocTopo = topohash[end_true][0]
            eloc = model.tmap_locs[iElocTopo]

            total_dist = 0.0
            for vp in model.viewpoints:
                topo, orient = vp.split("_")
                vp_loc = model.tmap_locs[float(topo)]
                total_dist += math2d_dist(vp_loc, eloc)

            expected_dist = total_dist / len(model.viewpoints)
            Dists.append(expected_dist)
    Y = []
    X = []
    for threshold in Dists:

        #get the ones above the threshold
        #print nonzero(array(Dists) > threshold)
        #print array(Dists) > threshold
        Itrue, = nonzero(array(Dists) <= threshold)

        Y.append(len(Itrue) / (1.0 * len(Dists)))
        X.append(threshold)

    num_correct_at_threshold = len(nonzero(array(Dists) <= 10)[0])
    print "random less than 10 meters", num_correct_at_threshold,
    print "%.3f%%" % (num_correct_at_threshold / (1.0 * len(Dists)))
    print "sorting"
    X, I = quicksort(X)
    print "taking"
    Y = array(Y).take(I)
    print "plotting"

    if (X[0] > 0.0):
        Xf = [X[0]]
        Xf.extend(X)
        Yf = [0]
        Yf.extend(Y)
        X = Xf
        Y = Yf

    p = plot_markers_evenly(X,
                            Y,
                            label,
                            marker,
                            color,
                            linewidth=2.5,
                            linestyle=linestyle)
    xlabel('distance from destination (m)')
    ylabel('proportion correct')
    return p
Exemple #23
0
def test_min_quicksort():
    sorted_items = [KeyedItem(key=i) for i in range(100)]
    items = [item for item in sorted_items]
    random.shuffle(items)
    quicksort(items)
    assert items == sorted_items
Exemple #24
0
def get_region_to_topo_hash_containment(tf_region, dg_model):
    #the tagfile here is of the regions
    
    ret_hash = {}
    ppoly = tf_region.polygons
    mymap = tf_region.get_map()

    for pp in ppoly:
        #add all of the topologies based on containment
        pts_I = [];
        for tm_key in dg_model.tmap_keys:
            tm_loc = dg_model.tmap_locs[tm_key]
            tm_loc = mymap.to_index(tm_loc)
            
            bbx1 = min(pp.X); bby1 = min(pp.Y);
            bbx2 = max(pp.X); bby2 = max(pp.Y);
            
            if(tm_loc[0] <= bbx2 and tm_loc[0] >= bbx1 and 
               tm_loc[1] <= bby2 and tm_loc[1] >= bby1):
                #it is contained
                if(len(dg_model.tmap[tm_key]) == 0):
                    continue

                if(ret_hash.has_key(pp.tag)):
                    ret_hash[pp.tag].append(tm_key)

                else:
                    ret_hash[pp.tag] = [tm_key]
                pts_I.append(tm_loc)

        #resort them by distance from the center 
        #         of the original region 
        if(ret_hash.has_key(pp.tag) and len(ret_hash[pp.tag]) > 1) and len(pts_I) != 0:
            D = tklib_get_distance(transpose(pts_I), [mean(pp.X), mean(pp.Y)]);
            D_srt, I_srt = quicksort(D)
            ret_hash[pp.tag] = list(array(ret_hash[pp.tag]).take(I_srt))

        
        #in case nothing was added for a particular tag
        if(not ret_hash.has_key(pp.tag)):
            print "region not found via containment"
            #raw_input()
            best_tmkey = None
            best_tmdist = 10000000000000000.0
            
            best_tmkey_dist = None
            best_tmdist_dist = 10000000000000000.0
            tm_loc1, tm_loc_dist = None, None
                
            for tm_key in dg_model.tmap_keys:
                tm_loc = dg_model.tmap_locs[tm_key]
                
                #tm_loc is in xy and we need to convert to 
                #  an index
                #print "getting euclidean dist"
                tm_d = math2d_dist([mean(pp.X), mean(pp.Y)], 
                                   mymap.to_index(tm_loc))
                #print "getting distances"
                tm_d_dist = pp.min_dist(mymap.to_index(tm_loc))
                
                #print "next"
                if(tm_d < best_tmdist):
                    best_tmdist = tm_d
                    best_tmkey = tm_key
                    tm_loc1 = tm_loc

                if(tm_d_dist < best_tmdist_dist):
                    best_tmdist_dist = tm_d_dist
                    best_tmkey_dist = tm_key
                    tm_loc_dist = tm_loc
    
            pts_I.extend([tm_loc_dist, tm_loc1])
            
            ret_hash[pp.tag] = [best_tmkey_dist, best_tmkey]
            
            
    return ret_hash
def test_quicksort():
    the_list = fill_random_list()
    assert sorting.quicksort(the_list) == sorted(the_list)