def trustworthiness(orig, proj, ks): """Calculate a trustworthiness values for dataset. orig matrix containing the data in the original space proj matrix containing the data in the projected space ks range indicating neighbourhood(s) for which trustworthiness is calculated. Return list of trustworthiness values """ dd_orig = distance.distance_matrix(orig) dd_proj = distance.distance_matrix(proj) nn_orig = dd_orig.argsort() nn_proj = dd_proj.argsort() ranks_orig = distance.rank_matrix(dd_orig) trust = [] for k in ks: moved = [] for i in range(orig.shape[0]): moved.append(moved_in(nn_orig, nn_proj, i, k)) trust.append(trustcont_sum(moved, ranks_orig, k)) return trust
def continuity(orig, proj, ks): """Calculate a continuity values for dataset orig matrix containing the data in the original space proj matrix containing the data in the projected space ks range indicating neighbourhood(s) for which continuity is calculated. Return a list of continuity values """ dd_orig = distance.distance_matrix(orig) dd_proj = distance.distance_matrix(proj) nn_orig = dd_orig.argsort() nn_proj = dd_proj.argsort() ranks_proj = distance.rank_matrix(dd_proj) cont = [] for k in ks: moved = [] for i in range(orig.shape[0]): moved.append(moved_out(nn_orig, nn_proj, i, k)) cont.append(trustcont_sum(moved, ranks_proj, k)) return cont
def trustworthiness(orig, proj, ks): """Calculate a trustworthiness values for dataset. orig matrix containing the data in the original space proj matrix containing the data in the projected space ks range indicating neighbourhood(s) for which trustworthiness is calculated. Return list of trustworthiness values """ dd_orig = distance.distance_matrix(orig) dd_proj = distance.distance_matrix(proj) nn_orig = dd_orig.argsort() nn_proj = dd_proj.argsort() ranks_orig = distance.rank_matrix(dd_orig) trust = [] for k in ks: k = k * 5 moved = [] for i in range(orig.shape[0]): moved.append(moved_in(nn_orig, nn_proj, i, k)) trust.append(trustcont_sum(moved, ranks_orig, k)) return trust
def test_one_vector_matrix(self): """Distance matrix for matrix with one repeated random vector is zero""" vec = numpy.random.randn(1,50) mat = numpy.zeros([50,50]) for row in range(50): mat[row,:] = vec assert (distance.distance_matrix(mat) == numpy.zeros([50,50])).all(), "Distance matrix for matrix composed from one random matrix"
def test_random(self): items = 10 dim = 10 orig = numpy.random.randn(items,dim) proj = numpy.random.randn(items,dim) dd_orig = distance.distance_matrix(orig) dd_proj = distance.distance_matrix(proj) nn_orig = dd_orig.argsort() nn_proj = dd_proj.argsort() for i in range(items): moved_in = trustcont.moved_in(nn_orig, nn_proj, i, i) assert (not i in moved_in), "Point itself cannot be in its'\ own neighbour" moved_out = trustcont.moved_out(nn_orig, nn_proj, i, i) assert (not i in moved_out), "Point itself cannot be in its'\
def test_two_points(self): # two points X = numpy.array([[0,0],[-1,0]]) D = distance.distance_matrix(X) assert D.shape[0] == D.shape[1], "Distance matrix is square" assert D[0,0] == 0, "(0,0) - (0,0)" assert D[1,0] == 1, "(-1,0) - (0,0)" assert D[0,1] == 1, "(0,0) - (-1,0)" assert D[1,1] == 0, "(1,1) - (1,1)"
def test_random(self): i = numpy.random.randint(100) j = numpy.random.randint(100) a = numpy.random.random([i,j]) d = distance.distance_matrix(a) r = distance.rank_matrix(d) assert r.shape[0] == r.shape[1], "Rank matrix is square" for i in range(r.shape[0]): assert r[i,i] == 0, "Rank (i,i) is zero"
def test_random(self): i = numpy.random.randint(100) j = numpy.random.randint(100) a = numpy.random.random([i,j]) d = distance.distance_matrix(a) assert d.shape[0] == d.shape[1], "Distance matrix is square" for i in range(d.shape[0]): assert d[i,i] == 0, "d(i,i) == 0"
def setUp(self): """ original: projection: 2 0 1 0 2 1 origo = (0) """ self.orig = numpy.array([[0,0], [2,0], [0,1]]) self.proj = numpy.array([[0,0], [5,0], [4,0]]) self.dd_orig = distance.distance_matrix(self.orig) self.dd_proj = distance.distance_matrix(self.proj) self.nn_orig = self.dd_orig.argsort() self.nn_proj = self.dd_proj.argsort()
def test_five_points(self): # five points X = numpy.array([[1,1], [2,2], [4,4], [3,5], [0,0], [2,0]]) D = distance.distance_matrix(X) assert D.shape[0] == D.shape[1], "Distance matrix is square" for i in range(D.shape[0]): assert D[i,i] == 0, "Distance to vector itself is zero" # check precalculated distances assert D[4,5] == 2, "Distance between (0,0) and (2,0) is 2" assert D[5,4] == 2, "d (2,0) - (0,0) == 2"
def test_three_points(self): a = numpy.array([[0,0], [1,1], [4,4]]) d = distance.distance_matrix(a) r = distance.rank_matrix(d) assert r.shape[0] == r.shape[1], "Rank matrix is square" # rank(i,i) is zero for i in range(r.shape[0]): assert r[i,i] == 0, "Rank (i,i) is zero" assert r[0,1] == 1, "r(0,1)" assert r[0,2] == 2, "r(0,2)" assert r[1,0] == 1, "r(1,0)" assert r[1,2] == 2, "r(1,2)" assert r[2,0] == 2, "r(2,0)" assert r[2,1] == 1, "r(2,1)"
import sys from distance import score as distance_matrix, update from q import score as q_matrix, new_node_distance from sequence_file import read # answers neighbours = [] # Read list of sequences S S = read("sequences.txt") # Calculate distance matrix D, labels = distance_matrix(S) # program loop while True: # Calculate Q matrix and minimum values Q, min = q_matrix(D) # Calculate distance of min values to new node f = min[0] g = min[1] f_label = labels[f] g_label = labels[g] dist_fu, dist_gu = new_node_distance(f, g, D) # update distance matrix D with new node D, labels = update(D, f, g, labels) # append results and correct for zero-indexed labelling, assignment requies 1-indexing neighbours.append([f_label + 1, labels[-1] + 1, dist_fu])
def test_unit_matrix(self): """Distance matrix for unit matrix is zero""" assert (distance.distance_matrix(self.o) == self.z).all(), "Distance matrix for unit matrix"
def test_zero_matrix(self): """Distance matrix for zero matrix is zero""" assert (distance.distance_matrix(self.z) == self.z).all(), "Distance matrix for zero matrix"