def determineSizeOfLastRock(weights): #we need to get a list #we need to sort list #loop until 1 or no rock remains #each iteration smash rocks and add remainder rock to collection in correct order #validate list if weights == None or weights == []: return 0 weights = list(filter(validateInput, weights)) #sort list numRocks = len(weights) Sort.quicksort(weights) #loop until weights are down to one entry while (numRocks > 1): newRock = abs(weights[numRocks - 1] - weights[numRocks - 2]) del weights[numRocks - 1] del weights[numRocks - 2] numRocks -= 2 if newRock > 0: Insert.insertInOrderBinary(weights, newRock) numRocks += 1 #handle perfect cancellation case if numRocks < 1: return 0 else: return weights[0]
def __buildArticleList(pages,articleInds,pageLimit,numArticles): articles = [] numPages=0 for i in range(numArticles): numPages+=pages[i] if numPages>pageLimit: numPages-=pages[i] continue articles.append(articleInds[i]) Sort.quicksort(articles) return articles
def kNN_g0(pt, pts, num=1): pts = array(pts) minElt = pts[0] dists = [] #get the distsances to each of the points for oPt in pts: dists.append(distance(oPt, pt)) sort_dists, I = quicksort(dists) #while we don't have enough points, keep adding them knn_pts = [] knn_ind = [] knn_dist = [] i = 0 while (len(knn_pts) < num and i < len(pts)): if (not sort_dists[i] == 0): knn_pts.append(pts[I[i]]) knn_ind.append(I[i]) knn_dist.append(sort_dists[i]) i = i + 1 return array(knn_pts), knn_ind, knn_dist
def flickr_stats(filename, filter_filename=None): prior = cPickle.load(open(filename, 'r')) print "number of tags:", len(prior.keys()) myfilter = None if(filter_filename != None): myfilter = filter_load(filter_filename) mytags_hash = {} for obj in prior.keys(): location = prior[obj] #for elt in myfilter: try: if(myfilter != None and myfilter[obj]): mytags_hash[obj] = sum(location.values()) elif(myfilter == None): mytags_hash[obj] = sum(location.values()) except: continue num_locations = len(prior.keys()) num_objects = len(mytags_hash.keys()) print "number of locations:", num_locations print "number of objects:", num_objects K = array(mytags_hash.keys()) Vs = array(mytags_hash.values()) V, I = quicksort(mytags_hash.values()) print "number of flowers:", mytags_hash["flower"] #for i in range(len(mytags_hash.keys())): # print mytags_hash.keys()[i], mytags_hash.values()[i] #print "final key", K[I[len(V)-1]] #print "final value", Vs[I[len(V)-1]] print len(I), len(V) mf_keys = K.take(I[len(I)-100:len(I)]).tolist() mf_vals = array(Vs).take(I[len(I)-100:len(I)]).tolist() mf_keys.reverse() mf_vals.reverse() p2 = bar(arange(len(mf_vals)), mf_vals, color='b', width=0.8) setp(gca(), 'xticks', arange(len(mf_vals))) labels = setp(gca(), 'xticklabels', mf_keys) setp(labels, 'rotation', 'vertical') print mf_keys #labels = xticks(arange(len(mf_vals)), mf_keys) #xticks(arange(len(mf_vals)), mf_keys) show()
def flickr_stats(filename, object, filter_filename): prior = cPickle.load(open(filename, 'r')) #plot the curve mu = mean(prior[object].values()) v = var(prior[object].values()) print mu print v X = array(range(0, max(prior[object].values()))) Y = 1.0 / (1.0 + exp(-1.0 * (X - mu - min(prior[object].values())) / v)) print "x", X print "x-mu", (X - mu) / v print "y:", Y plot(X, Y) xlabel("object count") ylabel("probability of " + object) figure() filter = filter_load(filter_filename) mytags_hash = prior[object] for key in mytags_hash.keys(): if (not key in filter): mytags_hash.pop(key) K = array(mytags_hash.keys()) V, I = quicksort(mytags_hash.values()) mf_keys = K.take(I[len(I) - 20:len(I)]).tolist() mf_vals = array(mytags_hash.values()).take(I[len(I) - 20:len(I)]).tolist() #mf_keys.reverse() #mf_vals.reverse() p2 = barh(arange(len(mf_vals)), mf_vals, color='b', height=0.8) setp(gca(), 'yticks', arange(len(mf_vals))) labels = setp(gca(), 'yticklabels', mf_keys) #setp(labels, 'rotation', 'vertical') #labels = xticks(arange(len(mf_vals)), mf_keys) #xticks(arange(len(mf_vals)), mf_keys) title("Location counts for " + object) show()
def plot_roc_curve(model, mykeyword, learner): training_docs, test_docs, train_label, test_label = model.get_training_test_sets( mykeyword, 0.8) print test_label Scores = [] Thresholds = set([]) for i, doc_i in enumerate(test_docs): res = model.predict(mykeyword, model.documents[doc_i], learner=learner) if (res == None): continue Scores.append(res[1].values()[-1]) Thresholds.add(res[1].values()[-1]) Thresholds = list(Thresholds) Thresholds.sort() print "scores:", Scores TPR = [] FPR = [] for i, s in enumerate(Thresholds): #print "thresh:", s #print "diff", abs(Thresholds[i]-Thresholds[i-1]) #print "diff", abs(Thresholds[i]-Thresholds[i-1]) < 10^-2 #print 10**-2 #if(i > 0 and abs(Thresholds[i]-Thresholds[i-1]) < 10**-3): # print "cont" # continue tp, fp, tn, fn = get_statistics(Scores, s, test_label) if (tp + fn == 0): TPR.append(0) else: TPR.append((1.0 * tp) / (tp + fn)) if (fp + tn == 0): FPR.append(0) else: FPR.append((1.0 * fp) / (fp + tn)) FPR_srt, I = quicksort(FPR) TPR = array(TPR) TPR_srt = TPR.take(I) plot(FPR_srt, TPR_srt, 'kx-') title(mykeyword) xlabel("False positive rate") ylabel("True positive rate")
def get_roc_all(myhash_gtruth, myhash_pclass, obj_type, type_detector): thresholds = arange(0, 1, 0.005) if(type_detector=='prob_mrf'): thresholds = [] for elt in myhash_pclass.keys(): thresholds.append(myhash_pclass[elt]) thresholds.sort() tp_rate = [] fp_rate = [] print "type_detector", type_detector for thresh in thresholds: if(fmod(thresh, 0.01) == 0): print thresh tp, tn, fp, fn = get_tp_fp(myhash_gtruth, myhash_pclass, obj_type, type_detector, thresh) if(not tp == None and not fp == None and tp+fp > 0 and fp+tn > 0): tp_rate.append((tp*1.0)/(tp+fp)*1.0) fp_rate.append((fp*1.0)/(fp+tn)*1.0) print "threshold:", thresh, " tp:", tp_rate[-1], " fp:", fp_rate[-1] #raw_input() #sort the tp rates Vfp, I = quicksort(fp_rate) tp_rate = array(tp_rate) Vtp = tp_rate.take(I) area = 0 for i in range(1, len(Vfp)): #print "recalls", recalls[i] - recalls[i-1] #print "precision", precisions[i] area += abs(Vtp[i-1] - Vtp[i])*Vfp[i-1] #raw_input() #if(area == 0): # for i in range(1, len(recalls)): # area += abs(recalls[i-1] - recalls[i])*precisions[i] return Vtp, Vfp, area
def kNN(pt, pts, num=1): pts = array(pts) minElt = pts[0] dists = [] #get the distsances to each of the points for oPt in pts: dists.append(distance(oPt, pt)) sort_dists, I = quicksort(dists) knn_pts = [] for i in range(num): knn_pts.append(pts[I[i]]) return array(knn_pts), I[0:num], sort_dists[0:num]
def spectral_clustering_auto(X, r, k=None, kMax=None, W=None, seed_number=987): ''' return labels for each data point runs spectral clustering on X and displays the resulting clustering, overlayed onto the neighbourhood graph used. Arguments: X -- the data r -- number of neighbors k -- number of resulting clusters t -- the threshold for the selection of eigenvalues near 1 ''' #create the weight matrix and normalize it if (W is None): W = weights_perona(X, r) Dinv = diag(1 / (1.0 * sum(W, 1))) M = dot(dot(sqrt(Dinv), W), sqrt(Dinv)) #get the eigenvectors, which are now in the columnsx U, E, V = svd(M) V = transpose(U) sortedE, I = quicksort(E) #get only the relevant eigenvectors based on the number of classes #that we have selected V_sorted = [] for j in range(len(I) - kMax, len(I)): V_sorted.append(V[I[j], :]) #normalize the rows of the eigenvectors V = transpose(V_sorted) V_pr, k = get_number_of_clusters(V, kMax) divisor = 1.0 / (sum(sqrt(V_pr**2), 1) + (10.0**(-30))) V_pr = dot(diag(divisor), V_pr) #use optimized methods here print "random seed:", seed_number tklib_init_rng(seed_number) means = kmeans_autoinit(transpose(V_pr), 100, k) labels = kmeans_get_labels(transpose(V_pr), means) return labels, k
def labelme_stats(filename): myfile = open(filename, 'r') mytags = {} num_lines = 0 for line in myfile: num_lines += 1 tags = line.split(',')[3:] for tag in tags: if (tag == " " or tag == ''): continue try: mytags[tag] += 1 except: mytags[tag] = 1 print "number of images", num_lines print "number of unique tags", len(mytags.keys()) K = array(mytags.keys()) V, I = quicksort(mytags.values()) mf_keys = K.take(I[-100:-2]).tolist() mf_vals = array(mytags.values()).take(I[-100:-2]).tolist() mf_keys.reverse() mf_vals.reverse() p2 = bar(arange(len(mf_vals)), mf_vals, color='b', width=0.8) setp(gca(), 'xticks', arange(len(mf_vals))) labels = setp(gca(), 'xticklabels', mf_keys) setp(labels, 'rotation', 'vertical') print mf_keys #labels = xticks(arange(len(mf_vals)), mf_keys) #xticks(arange(len(mf_vals)), mf_keys) show()
def spectral_clustering(X, r, t=None, k=None, W=None, numkmeans=1, seed_number=987, max_dist=10e10): ''' return labels for each data point runs spectral clustering on X and displays the resulting clustering, overlayed onto the neighbourhood graph used. Arguments: X -- the data r -- number of neighbors k -- number of resulting clusters t -- the threshold for the selection of eigenvalues near 1 ''' print "running with", "t=", t, "k=", k, "max_dist=", max_dist #create the weight matrix and normalize it if (W is None): W = weights_perona(X, r) Dinv = diag(1 / (1.0 * sum(W, 1))) M = dot(dot(sqrt(Dinv), W), sqrt(Dinv)) #get the eigenvectors, which are now in the columnsx U, E, V = svd(M) V = transpose(U) sortedE, I = quicksort(E) #estimate the number of clusters by looking at the eigenvalues if (t == None): #this has worked really well: t=0.195 #t=0.01 #t=0.17 t = 0.185 #t=0.195 #t=0.18 if (k == None): k = 0 for i in range(len(sortedE)): if ((1.0 - sortedE[len(sortedE) - i - 1]) > t): break k = k + 1 if (k == 0): print "There are no eigenvalues of 1" print "The closest is:", sortedE[len(sortedE) - 1] return #print "E:", E print "number of clusters", k #get only the relevant eigenvectors based on the number of classes #that we have selected V_sorted = [] E_svals = [] for j in range(len(I) - k, len(I)): V_sorted.append(V[I[j], :]) E_svals.append(E[I[j]]) #print len(I) #print range(len(I)-k,len(I)) #print "number of ev:", len(range(len(I)-k,len(I))) #raw_input() #normalize the rows of the eigenvectors V = transpose(V_sorted) #print "V", #for elt in V: # print elt divisor = 1.0 / (sum(sqrt(V**2), 1) + (10.0**(-30))) V = dot(diag(divisor), V) #try to weight the vector values V = V * array([E_svals]) #print "V_norm", V #for elt in V: # print elt #use optimized methods here print "random seed:", seed_number tklib_init_rng(seed_number) mylabels = [] for i in range(numkmeans): print "performing k-means on ", k, "clusters" means = kmeans_autoinit(transpose(V), 500, k) mylabels.append(kmeans_get_labels(transpose(V), means)) D = array(kmeans_get_distances(transpose(V), means)) print "d", D.shape for j, label in enumerate(mylabels[-1]): print label, j if (D[int(label), j] > max_dist): mylabels[-1][j] = nan return mylabels, k
def test_quicksort(): test_list = generateRandomList() sorting.quicksort(test_list) assert (is_sorted(test_list))
1 """ global cpt cpt = cpt + 1 if a == b: return 0 elif a < b: return -1 else: return 1 if __name__ == "__main__": cpt = 0 t = generate.random_list(1000) tt = sorting.merge_sort(t, cmp) print(tt) if generate.is_sorted(tt): print("Yes !!") else: raise Exception("List has not been correctly sorted by merge sort") print(cpt) print(t) sorting.quicksort(t, cmp) print(cpt) if generate.is_sorted(t): print("Yes !!") else: raise Exception("List has not been correctly sorted by quicksort")
import random from sorting import selection_sort, bubble_sort, insertion_sort from sorting import mergesort, quicksort any_numbers = random.sample(range(1, 1000), 42) already_sorted = [1, 2, 3, 4, 5, 6, 9, 20, 22, 23, 28, 32, 34, 39, 40, 42, 76, 87, 99, 112] inversed = [117, 90, 88, 83, 81, 77, 74, 69, 64, 63, 51, 50, 49, 42, 41, 34, 32, 29, 28, 22, 16, 8, 6, 5, 3, 1] repeated = [7, 7, 7, 7, 7, 1, 1, 9, 9, 0, 4, 4, 4, 5, 4, 5, 7, 1,] if __name__ == "__main__": test_cases = {'Números aleatórios': any_numbers, 'Já ordenados': already_sorted, 'Ordem inversa': inversed, 'Elementos repetidos': repeated } print("*******************************") for name, lista in test_cases.items(): print("\nCaso de teste: {}".format(name)) print(lista) quicksort(lista) print("\n Ordenado:") print(lista) print("*******************************")
def plot_distance_curve_iros(ofile, corpus, tag_file, marker, color, thelabel='', use_strict_correctness=False, followedState=None, sentence_i_to_run=None, linestyle="-"): Dists = [] threshold = 10 num_correct = 0 total = 0.0 for i in range(len(ofile['path'])): if (ofile['sentences'][i] == None): print "sentence", i, "was", ofile['sentences'][i] continue rst, rend = ofile['regions'][i].split("to") rst = rst.strip() rend = rend.strip() direction = corpus.directions[i] if followedState != None and direction.was_followed != followedState: assert direction.start == rst, (direction.start, rst) assert direction.end == rend, (direction.end, rend) continue #t2 = ofile['region_to_topology'][rend] #iterate over all the topologies in the final region curr_d = 70.0 t2_loc = transpose(tag_file.get_tag_locations(rend))[0] #for myelt in t2: # t2_loc = ofile['tmap_locs'][myelt] #iterate over all of the paths that end in the location for k in range(len(ofile['path'][i])): if ofile['path'][i][k] == None: continue t1 = ofile['path'][i][k][-1] t1 = float(t1.split("_")[0]) t1_loc = ofile["tmap_locs"][t1] if (tklib_euclidean_distance(t2_loc, t1_loc) < curr_d): curr_d = tklib_euclidean_distance(t2_loc, t1_loc) if use_strict_correctness and sentence_i_to_run != None: raise ValueError("Must pass one or the other and not both." + ` use_strict_correctness ` + " and " + ` sentence_i_to_run `) if use_strict_correctness or sentence_i_to_run != None: if use_strict_correctness: best_scoring_run_k = argmax(ofile['probability'][i]) elif sentence_i_to_run != None: best_scoring_run_k = sentence_i_to_run[myelt] if ofile['path'][i][best_scoring_run_k] == None: best_scoring_run_k = argmax(ofile['probability'][i]) t1 = float(ofile['path'][i][best_scoring_run_k][-1].split("_")[0]) t1_loc = ofile["tmap_locs"][t1] curr_d = tklib_euclidean_distance(t2_loc, t1_loc) if curr_d < threshold or ofile['correct'][i][0]: num_correct += 1 total += 1 Dists.append(curr_d) print thelabel, "num_correct less than %.2f meters: %d (%.3f%%)" % ( threshold, num_correct, 100.0 * num_correct / total) Y = [] X = [] for threshold in Dists: #get the ones above the threshold #print nonzero(array(Dists) > threshold) #print array(Dists) > threshold Itrue, = nonzero(array(Dists) <= threshold) Y.append(len(Itrue) / (1.0 * len(Dists))) X.append(threshold) X, I = quicksort(X) Y = array(Y).take(I) p = plot_markers_evenly(X, Y, thelabel, marker, color, linewidth=2.5, linestyle=linestyle) mpl.xlabel('distance from destination (m)') mpl.ylabel('proportion correct') #draw() #show() #raw_input() return p
def plot_distance_curve_subject(ofile, create_figure=True, mystyle=None, best_sub_only=False, best_question_only=False, included_subjects=None): styles = [ "ro-", "b^-", "k>-", "g<-", "ro--", "b^--", "k>--", "g<--", "ro-.", "b^-.", "k>-.", "g<-.", "ro:", "b^:", "k>:", "g<:" ] if (create_figure): figure() Dists = {} Dists_question = {} for i in range(len(ofile['path'])): #Dists.append([]) if (ofile['sentences'][i] is None): print "sentence", i, "was", ofile['sentences'][i] continue rst, rend = ofile['regions'][i].split("to") rend = rend.strip() t2 = ofile['region_to_topology'][rend] #iterate over all the topologies in the final region curr_d = 100000000000000000000000000000.0 for myelt in t2: t2_loc = ofile['tmap_locs'][myelt] #iterate over all of the paths that end in the location for k in range(len(ofile['path'][i])): path = ofile['path'][i][k] if path is None: curr_d = 100000000000000000 else: t1 = path[-1] t1 = float(t1.split("_")[0]) t1_loc = ofile["tmap_locs"][t1] if (math2d_dist(t2_loc, t1_loc) < curr_d): curr_d = math2d_dist(t2_loc, t1_loc) #subjects if (not Dists.has_key(ofile["subjects"][i])): Dists[ofile["subjects"][i]] = [] Dists[ofile["subjects"][i]].append(curr_d) #regions if (not Dists_question.has_key(ofile["regions"][i])): Dists_question[ofile["regions"][i]] = [] Dists_question[ofile["regions"][i]].append(curr_d) xlabel('distance from destination (m)') ylabel('percentage correct') mylabel = None if (best_sub_only): dvals = sum(Dists.values(), axis=1) i = argmin(dvals) new_vals = Dists.values()[i] new_key = Dists.keys()[i] Dists = {} Dists[new_key] = new_vals #mylabel=thelabel+" (Best Subject)" if (best_question_only): dvals = sum(Dists_question.values(), axis=1) i = argmin(dvals) new_vals = Dists_question.values()[i] new_key = Dists_question.keys()[i] Dists = {} Dists[new_key] = new_vals #mylabel=thelabel+" (Best Question)" plots = [] for k, subject in enumerate(Dists.keys()): if included_subjects != None and not subject in included_subjects: continue Y = [] X = [] for threshold in Dists[subject]: #get the ones above the threshold Itrue, = nonzero(array(Dists[subject]) <= threshold) Y.append(len(Itrue) / (1.0 * len(Dists[subject]))) X.append(threshold) X, I = quicksort(X) Y = array(Y).take(I) sub_plt = subject.replace("Subject", "Sub.") if mystyle is None: style = styles[k % len(styles)] else: style = mystyle if (X[0] > 0.0): Xf = [X[0]] Xf.extend(X) Yf = [0] Yf.extend(Y) X = Xf Y = Yf plots.extend(plot(X, Y, style, label=sub_plt, linewidth=2.5)) num_correct_at_threshold = len(nonzero(array(Dists[subject]) <= 10)[0]) print k, subject, "less than 10 meters", num_correct_at_threshold, print "%.3f%%" % ((100.0 * num_correct_at_threshold) / (1.0 * len(Dists[subject]))) return plots
def plot_roc_curve(ofile): figure() probs_orig = ofile['probability'] correctness_orig = ofile['correct_neigh'] #get the probs probs = [] for ps_i, ps in enumerate(probs_orig): if (not array(ps).max() is None): probs.append(array(ps).max()) else: probs.append(0) #if(len(probs) == 0): # print "is empty" # raw_input() # get whether the question was correct correctness = [] for crr in correctness_orig: if (True in crr): correctness.append(True) else: correctness.append(False) #root by the length of the path i = 0 for elt in ofile['keywords']: print "test", elt print "after test" print "probs[i]=", probs[i] if (len(elt) > 0): probs[i] = pow(probs[i], 1 / (1.0 * len(elt) - 1)) else: probs[i] i += 1 print "number correct:", sum(correctness) print "total directions:", len(correctness) TPR = [] FPR = [] for threshold in probs: #get the ones above the threshold #print nonzero(array(probs) > threshold) #print array(probs) > threshold Itrue, = nonzero(array(probs) >= threshold) iscorrect = array(correctness).take(Itrue) TP = sum(iscorrect) * 1.0 TP_FP = len(iscorrect) * 1.0 FP = TP_FP - TP #get the ones below the threshold Ifalse, = nonzero(array(probs) <= threshold) is_not_correct = array(correctness).take(Ifalse) FN = sum(is_not_correct) * 1.0 TN_FN = len(is_not_correct) * 1.0 TN = TN_FN - FN TPR.append(1.0 * TP / ((TP + FN) + 0.00000000001)) FPR.append(1.0 * TN / ((TN + FP) + 0.00000000001)) V, I = quicksort(FPR) X = array(FPR).take(I) Y = array(TPR).take(I) plot(X, Y, 'r-', linewidth=2.5) #font = FontProperties(size='x-small') xlabel('false positive rate') ylabel('true positive rate') AUC = 0.0 for i in range(len(X) - 1): AUC += (X[i + 1] - X[i]) * Y[i] title("AUC=" + str(AUC)) draw()
def test_quicksort(self): correct = self.array[::] correct.sort() sorting.quicksort(self.array) self.assertEqual(self.array, correct)
def plot_distance_curve(ofile, corpus, marker, color, thelabel='', use_strict_correctness=False, followedState=None, sentence_i_to_run=None, linestyle="-"): Dists = [] threshold = 10 num_correct = 0 total = 0.0 for i in range(len(ofile['path'])): if (ofile['sentences'][i] is None): print "sentence", i, "was", ofile['sentences'][i] continue rst, rend = ofile['regions'][i].split("to") rst = rst.strip() rend = rend.strip() direction = corpus.directions[i] if followedState != None and direction.was_followed != followedState: assert direction.start == rst, (direction.start, rst) assert direction.end == rend, (direction.end, rend) continue #print "r", ofile['region_to_topology'] t2 = ofile['region_to_topology'][rend] #iterate over all the topologies in the final region curr_d = 70.0 for myelt in t2: t2_loc = ofile['tmap_locs'][myelt] #iterate over all of the paths that end in the location for k in range(len(ofile['path'][i])): if ofile['path'][i][k] is None: continue t1 = ofile['path'][i][k][-1] t1 = float(t1.split("_")[0]) t1_loc = ofile["tmap_locs"][t1] if (math2d_dist(t2_loc, t1_loc) < curr_d): curr_d = math2d_dist(t2_loc, t1_loc) if use_strict_correctness and sentence_i_to_run != None: raise ValueError("Must pass one or the other and not both." + ` use_strict_correctness ` + " and " + ` sentence_i_to_run `) if use_strict_correctness or sentence_i_to_run != None: if use_strict_correctness: best_scoring_run_k = argmax(ofile['probability'][i]) elif sentence_i_to_run != None: best_scoring_run_k = sentence_i_to_run[myelt] if ofile['path'][i][best_scoring_run_k] is None: best_scoring_run_k = argmax(ofile['probability'][i]) t1 = float( ofile['path'][i][best_scoring_run_k][-1].split("_")[0]) t1_loc = ofile["tmap_locs"][t1] curr_d = math2d_dist(t2_loc, t1_loc) if curr_d < threshold or ofile['correct'][i][0]: num_correct += 1 total += 1 Dists.append(curr_d) all_visited_topos = [] import cPickle model = cPickle.load(open(ofile["options"]["model_fn"], 'r')) print "len", len(ofile["visited_viewpoints"]) for visited_vps in ofile["visited_viewpoints"]: assert len(visited_vps) == 1, len(visited_vps) visited_topos = set() for vp in visited_vps[0]: #print "vp", vp topo_i, orient = vp.split("_") visited_topos.add(topo_i) all_visited_topos.append( float(len(visited_topos)) / len(model.tmap_locs)) #print "visited", all_visited_topos #print "ofile", ofile["path"][0] print "average # of nodes visited", mean(all_visited_topos) print thelabel, "num_correct less than %.2f meters: %d (%.3f%%), visited %.3f%%" % ( threshold, num_correct, 100.0 * num_correct / total, 100 * mean(all_visited_topos)) Y = [] X = [] for threshold in Dists: #get the ones above the threshold #print nonzero(array(Dists) > threshold) #print array(Dists) > threshold Itrue, = nonzero(array(Dists) <= threshold) Y.append(len(Itrue) / (1.0 * len(Dists))) X.append(threshold) X, I = quicksort(X) Y = array(Y).take(I) p = plot_markers_evenly(X, Y, thelabel, marker, color, linewidth=2.5, linestyle=linestyle) xlabel('distance from destination (m)') ylabel('proportion correct') #draw() #show() #raw_input() return p
def test_max_quicksort(): sorted_items = [KeyedItem(key=i) for i in range(99, -1, -1)] items = [item for item in sorted_items] random.shuffle(items) quicksort(items, order='max') assert items == sorted_items
def plot_distance_curve_random(model, corpus_fn, gtruth_tag_fn, map_fn, color, marker, label='', linestyle="-", region_to_topology=None): """ Needs the viewpoints and stuff from the model. """ print "starting random" dsession = readSession(corpus_fn, "none") if gtruth_tag_fn != None: tf = tag_file(gtruth_tag_fn, map_fn) topohash = get_region_to_topo_hash_containment(tf, model) else: topohash = region_to_topology Dists = [] for elt in dsession: for i in range(len(elt.routeInstructions)): if (elt.columnLabels[i] is None): print "sentence", i, "was", elt.columnLabels[i] continue start_true, end_true = elt.columnLabels[i].split("to") start_true = str(start_true.strip()) end_true = str(end_true.strip()) iSlocTopo = topohash[start_true][0] iElocTopo = topohash[end_true][0] eloc = model.tmap_locs[iElocTopo] total_dist = 0.0 for vp in model.viewpoints: topo, orient = vp.split("_") vp_loc = model.tmap_locs[float(topo)] total_dist += math2d_dist(vp_loc, eloc) expected_dist = total_dist / len(model.viewpoints) Dists.append(expected_dist) Y = [] X = [] for threshold in Dists: #get the ones above the threshold #print nonzero(array(Dists) > threshold) #print array(Dists) > threshold Itrue, = nonzero(array(Dists) <= threshold) Y.append(len(Itrue) / (1.0 * len(Dists))) X.append(threshold) num_correct_at_threshold = len(nonzero(array(Dists) <= 10)[0]) print "random less than 10 meters", num_correct_at_threshold, print "%.3f%%" % (num_correct_at_threshold / (1.0 * len(Dists))) print "sorting" X, I = quicksort(X) print "taking" Y = array(Y).take(I) print "plotting" if (X[0] > 0.0): Xf = [X[0]] Xf.extend(X) Yf = [0] Yf.extend(Y) X = Xf Y = Yf p = plot_markers_evenly(X, Y, label, marker, color, linewidth=2.5, linestyle=linestyle) xlabel('distance from destination (m)') ylabel('proportion correct') return p
def test_min_quicksort(): sorted_items = [KeyedItem(key=i) for i in range(100)] items = [item for item in sorted_items] random.shuffle(items) quicksort(items) assert items == sorted_items
def get_region_to_topo_hash_containment(tf_region, dg_model): #the tagfile here is of the regions ret_hash = {} ppoly = tf_region.polygons mymap = tf_region.get_map() for pp in ppoly: #add all of the topologies based on containment pts_I = []; for tm_key in dg_model.tmap_keys: tm_loc = dg_model.tmap_locs[tm_key] tm_loc = mymap.to_index(tm_loc) bbx1 = min(pp.X); bby1 = min(pp.Y); bbx2 = max(pp.X); bby2 = max(pp.Y); if(tm_loc[0] <= bbx2 and tm_loc[0] >= bbx1 and tm_loc[1] <= bby2 and tm_loc[1] >= bby1): #it is contained if(len(dg_model.tmap[tm_key]) == 0): continue if(ret_hash.has_key(pp.tag)): ret_hash[pp.tag].append(tm_key) else: ret_hash[pp.tag] = [tm_key] pts_I.append(tm_loc) #resort them by distance from the center # of the original region if(ret_hash.has_key(pp.tag) and len(ret_hash[pp.tag]) > 1) and len(pts_I) != 0: D = tklib_get_distance(transpose(pts_I), [mean(pp.X), mean(pp.Y)]); D_srt, I_srt = quicksort(D) ret_hash[pp.tag] = list(array(ret_hash[pp.tag]).take(I_srt)) #in case nothing was added for a particular tag if(not ret_hash.has_key(pp.tag)): print "region not found via containment" #raw_input() best_tmkey = None best_tmdist = 10000000000000000.0 best_tmkey_dist = None best_tmdist_dist = 10000000000000000.0 tm_loc1, tm_loc_dist = None, None for tm_key in dg_model.tmap_keys: tm_loc = dg_model.tmap_locs[tm_key] #tm_loc is in xy and we need to convert to # an index #print "getting euclidean dist" tm_d = math2d_dist([mean(pp.X), mean(pp.Y)], mymap.to_index(tm_loc)) #print "getting distances" tm_d_dist = pp.min_dist(mymap.to_index(tm_loc)) #print "next" if(tm_d < best_tmdist): best_tmdist = tm_d best_tmkey = tm_key tm_loc1 = tm_loc if(tm_d_dist < best_tmdist_dist): best_tmdist_dist = tm_d_dist best_tmkey_dist = tm_key tm_loc_dist = tm_loc pts_I.extend([tm_loc_dist, tm_loc1]) ret_hash[pp.tag] = [best_tmkey_dist, best_tmkey] return ret_hash
def test_quicksort(): the_list = fill_random_list() assert sorting.quicksort(the_list) == sorted(the_list)