class TestMatrix(unittest.TestCase): def setUp(self): self.data = [l.split('\t') for l in open('tests/sample-matrix-file.txt', 'r').readlines()] self.matrix = SparseMatrix([2, 4, 9]) self.matrix.read_data(self.data) def testMatrixInit(self): self.assertEquals(self.matrix.nonzero_elements[(1, 3, 7)], 2.0) self.assertEquals(self.matrix.nonzero_elements[(0, 0, 0)], 2.0) self.assertEquals(self.matrix.nonzero_elements[(0, 0, 2)], 2.0) self.assertEquals(self.matrix.nonzero_elements[(1, 1, 5)], 7.0) self.assertEquals(self.matrix.nonzero_elements[(1, 1, 3)], 3.0) self.assertEquals(self.matrix.nonzero_elements[(1, 3, 6)], 2.0) self.assertEquals(self.matrix.nonzero_elements[(1, 3, 8)], 2.0) self.assertEquals(self.matrix.nonzero_elements[(0, 0, 1)], 2.0) self.assertEquals(self.matrix.nonzero_elements[(1, 1, 4)], 2.0) self.assertEquals(self.matrix.nonzero_elements[(1, 2, 5)], 2.0) self.assertEquals(len(self.matrix.nonzero_elements), 10) self.assertEquals(self.matrix.feature_ids[0], {'mice': 1, 'patient': 0}) self.assertEquals(self.matrix.feature_ids[1], {'R92Q': 1, 'R91W': 2, 'Val30Met': 0, 'R90W': 3}) self.assertEquals(self.matrix.feature_ids[2], {'START_ENTITY|nmod|END_ENTITY': 1, 'START_ENTITY|nummod|END_ENTITY': 5, 'FAP|compound|END_ENTITY': 2, 'expression|nmod|END_ENTITY': 8, '+|compound|END_ENTITY': 7, 'mice|nummod|END_ENTITY': 3, 'homozygous|nsubj|START_ENTITY': 6, 'mutation|appos|END_ENTITY': 4, 'START_ENTITY|nmod|FAP': 0}) def testShuffle(self): shuffled_matrix = self.matrix.shuffle() self.assertEquals(len(shuffled_matrix.nonzero_elements), len(self.matrix.nonzero_elements)) self.assertEquals(set(shuffled_matrix.nonzero_elements.values()), set(self.matrix.nonzero_elements.values())) print("shuffled matrix elements: ", shuffled_matrix.nonzero_elements)
def main(): data_file = sys.argv[1] ebc_cols = [int(e) for e in sys.argv[2].split(",")] K = [int(e) for e in sys.argv[3].split(",")] N_runs = int(sys.argv[4]) output_file = sys.argv[5] jitter_max = float(sys.argv[6]) max_iterations_ebc = int(sys.argv[7]) entity_cols = [int(e) for e in sys.argv[8].split(",")] object_toler = float(sys.argv[9]) # get original data raw_data = [line.split("\t") for line in open(data_file, "r")] data = [[d[i] for i in ebc_cols] for d in raw_data] data_dimensions = len(data[0]) - 1 # get axis length for each dimension N = [] for dim in range(data_dimensions): N.append(len(set([d[dim] for d in data]))) print(N) # set up matrix M = SparseMatrix(N) M.read_data(data) M.normalize() # set up entity map to ids entity_map = defaultdict(tuple) for d in raw_data: entity = tuple([d[i] for i in entity_cols]) entity_ids = tuple([M.feature_ids[ebc_cols.index(i)][d[i]] for i in entity_cols]) entity_map[entity_ids] = entity # figure out which ebc columns the entity columns correspond to entity_column_indices = [] for c in ebc_cols: if c in entity_cols: entity_column_indices.append(ebc_cols.index(c)) # run EBC and get entity cluster assignments ebc_M = EBC(M, K, max_iterations_ebc, jitter_max, object_toler) clusters = defaultdict(list) for t in range(N_runs): print "run ", t cXY_M, objective_M, it_M = ebc_M.run() for e1 in entity_map.keys(): c1_i = tuple([cXY_M[i][e1[i]] for i in entity_column_indices]) clusters[e1].append(c1_i) # print assignments writer = open(output_file, "w") for k in clusters: e1_name = entity_map[k] writer.write(",".join([str(e) for e in k]) + "\t" + ",".join([e for e in e1_name]) + "\t" + "\t".join([",".join([str(f) for f in e]) for e in clusters[k]]) + "\n") writer.flush() writer.close()
def from_csv_crs(rowPtr, colInd, value, file_out): """ Generates a binary file named file_out which represents the SparseMatrix generated from the csv files containing row pointer, column indices, and values in files named rowPtr, colInd, and value respectively. Precondition: Number of values in colInd and value is strictly greater than the number of values in rowPtr. Each entry in rowPtr and colInd is an integer and each entry in value is either a float or an integer. """ f1 = open(rowPtr, "r") f2 = open(colInd, "r") f3 = open(value, "r") rowP = [] colI = [] val = [] while True: l1 = f1.readline() if l1 == "": f1.close() break else: l1 = int(l1.strip("\n")) - 1 rowP.append(l1) l2 = int(f2.readline().strip("\n")) - 1 colI.append(l2) l3 = np.float64(f3.readline().strip("\n")) val.append(l3) while True: l2 = f2.readline() if l2 == "": f2.close() f3.close() break else: l2 = int(l2.strip("\n")) - 1 colI.append(l2) l3 = np.float64(f3.readline().strip("\n")) val.append(l3) A_sparse = SparseMatrix(len(rowP) - 1, len(rowP) - 1) A_sparse._rowPtr = rowP[:] A_sparse._colInd = colI[:] A_sparse._value = val[:] #Creating a binary file and dumping the SparseMatrix A_sparse in it file = open(file_out, "wb") pickle.dump(A_sparse, file) file.close()
def main(): """ An example run of EBC. """ with open("resources/matrix-ebc-paper-sparse.tsv", "r") as f: data = [] for line in f: sl = line.split("\t") if len(sl) < 5: # headers continue data.append([sl[0], sl[2], float(sl[4])]) matrix = SparseMatrix([14052, 7272]) matrix.read_data(data) matrix.normalize() ebc = EBC(matrix, [30, 125], 10, 1e-10, 0.01) cXY, objective, it = ebc.run()
def setUp(self): with open("resources/matrix-itcc-paper-orig.tsv", "r") as f: data = [l.split('\t') for l in f] self.matrix = SparseMatrix([6, 6]) self.matrix.read_data(data) self.matrix.normalize()
def setUp(self): self.A = [[1, 2, 0, 0, 3], [4, 5, 6, 0, 0], [0, 7, 8, 0, 9], [0, 0, 0, 10, 0], [11, 0, 0, 0, 12]] self.x = [[5], [4], [3], [2], [1]] self.x_full = FullMatrix(5, 1) self.A_full = FullMatrix(5, 5) self.A_sparse = SparseMatrix(5, 5) for i in range(5): self.x_full.addElement(i, 0, self.x[i][0]) for j in range(5): self.A_full.addElement(i, j, self.A[i][j]) self.A_sparse.addElement(i, j, self.A[i][j])
def testOldMatrix3d(self): with open("resources/matrix-ebc-paper-dense-3d.tsv", "r") as f: data = [] for line in f: sl = line.split("\t") data.append([sl[0], sl[1], sl[2], float(sl[3])]) matrix = SparseMatrix([756, 996, 1232]) matrix.read_data(data) matrix.normalize() ebc = EBC(matrix, [30, 30, 10], 100, 1e-10, 0.01) cXY, objective, it = ebc.run() print "objective: ", objective print "iterations: ", it self.assertEquals(len(ebc.pXY.nonzero_elements), 10007) self.assertEquals(len(set(ebc.cXY[0])), 30) self.assertEquals(len(set(ebc.cXY[1])), 30) self.assertEquals(len(set(ebc.cXY[2])), 10)
def setUp(self): with open("resources/matrix-ebc-paper-sparse.tsv", "r") as f: data = [] for line in f: sl = line.split("\t") if len(sl) < 5: # headers continue data.append([sl[0], sl[2], float(sl[4])]) self.matrix = SparseMatrix([14052, 7272]) self.matrix.read_data(data) self.matrix.normalize()
def compareRandom(num_trials, tensor_dimensions, matrix_data, cluster_dimensions, maxit_ebc, jitter_max_ebc, objective_tolerance): deltas = [] iterations_M = [] iterations_Mr = [] noconverge_M = 0 noconverge_Mr = 0 for j in range(num_trials): print "Trial ", j M = SparseMatrix(tensor_dimensions) M.read_data(matrix_data) Mr = M.shuffle() # could also be M.shuffle_old() M.normalize() ebc_M = EBC(M, cluster_dimensions, maxit_ebc, jitter_max_ebc, objective_tolerance) cXY_M, objective_M, it_M = ebc_M.run() if it_M == maxit_ebc: noconverge_M += 1 else: iterations_M.append(it_M) Mr.normalize() ebc_Mr = EBC(Mr, cluster_dimensions, maxit_ebc, jitter_max_ebc, objective_tolerance) cXY_Mr, objective_Mr, it_Mr = ebc_Mr.run() if it_Mr == maxit_ebc: noconverge_Mr += 1 else: iterations_Mr.append(it_Mr) deltas.append(objective_M - objective_Mr) return deltas, iterations_M, iterations_Mr, noconverge_M, noconverge_Mr
def calculate_joint_cluster_distribution(self, cXY, K, pXY): """ Calculate the joint cluster distribution q(X',Y') using the current prob distribution and cluster assignments. (Here we use X' to denote X_hat) Args: cXY: current cluster assignments for each axis K: numbers of clusters along each axis pXY: original probability distribution matrix Return: qXhatYhat: the joint cluster distribution """ if not isinstance(pXY, SparseMatrix): raise Exception("Matrix argument to calculate_joint_cluster_distribution not an instance of SparseMatrix.") qXhatYhat = SparseMatrix(K) # joint distribution over clusters for coords in pXY.nonzero_elements: # find the coordinates of the cluster for this element cluster_coords = [] for i in xrange(len(coords)): cluster_coords.append(cXY[i][coords[i]]) qXhatYhat.add_value(tuple(cluster_coords), pXY.nonzero_elements[coords]) return qXhatYhat
def test3DMatrix(self): data = [[0, 0, 0, 1.0], [0, 0, 1, 1.0], [0, 1, 0, 1.0], [0, 1, 1, 1.0], [1, 0, 0, 1.0], [1, 0, 1, 1.0], [1, 1, 0, 1.0], [1, 1, 1, 1.0], [2, 2, 2, 1.0], [2, 2, 3, 1.0], [2, 3, 2, 1.0], [3, 2, 2, 1.0], [2, 3, 3, 1.0], [3, 3, 2, 1.0], [3, 2, 3, 1.0], [3, 3, 3, 1.0], [4, 4, 4, 1.0], [4, 4, 5, 1.0], [4, 5, 4, 1.0], [4, 5, 5, 1.0], [5, 4, 4, 1.0], [5, 4, 5, 1.0], [5, 5, 4, 1.0], [5, 5, 5, 1.0]] matrix = SparseMatrix([6, 6, 6]) matrix.read_data(data) matrix.normalize() ebc = EBC(matrix, [3, 3, 3], 10, 1e-10, 0.01) assigned_C = [[0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2]] cXY, objective, it = ebc.run(assigned_C) self.assertEquals(cXY, assigned_C) self.assertAlmostEqual(objective, 0.0) self.assertEquals(it, 1) for i in range(100): cXY, objective, it = ebc.run() # random initialization print cXY, objective, it
def from_mtx(file_in, file_out): """ Generates a binary file named file_out which represents the SparseMatrix generated from the .mtx file named file_in. """ file = open(file_in, "r") #Opening file_in in reading mode line = file.readline() #Reading the first line which is a comment line = file.readline( ) #Reading the second line which contains rowRank, colRank, and number of entries #Extracting rowRank and colRank line = line.split() rowRank = int(line[0]) colRank = int(line[1]) num_elem = int(line[2]) #Initializing a SparseMatrix A_sparse = SparseMatrix(rowRank, colRank) #Reading the remainder of the file_in file to complete the SparseMatrix for i in range(1, num_elem + 1): #Loop for reading till the end of file_in # Extracting the row, column, and the value of the element. line = file.readline() line = line.split() row_coord = int(line[0]) - 1 col_coord = int(line[1]) - 1 value = np.float64(line[2].strip("\n")) A_sparse.addElement(row_coord, col_coord, value) #Finished reading file_in file.close() #Creating a binary file and dumping the SparseMatrix A_sparse in it file = open(file_out, "wb") pickle.dump(A_sparse, file) file.close()
class TestBenchmarkEBC(unittest.TestCase): """ Benchmark the EBC code as a unittest, using the sparse matrix data. """ def setUp(self): with open("resources/matrix-ebc-paper-sparse.tsv", "r") as f: data = [] for line in f: sl = line.split("\t") if len(sl) < 5: # headers continue data.append([sl[0], sl[2], float(sl[4])]) self.matrix = SparseMatrix([14052, 7272]) self.matrix.read_data(data) self.matrix.normalize() def testEbcOnSparseMatrix(self): ebc = EBC(self.matrix, [30, 125], 10, 1e-10, 0.01) cXY, objective, it = ebc.run() print "objective: ", objective print "iterations: ", it self.assertEquals(len(ebc.pXY.nonzero_elements), 29456) self.assertEquals(len(set(ebc.cXY[0])), 30) self.assertEquals(len(set(ebc.cXY[1])), 125)
def __init__(self, A, b, x0=0, tol=10**-9, max_iter=10**100): """ Initializes the matrix A, column matrix b, initial guess x0, tolerance, and maximum number of iterations max_iter. D_inv is the inverse of the diagonal matrix D obtained from the diagonal elements of A. R is the matrix obtained from (A - D) which is equivalent to (L+U) Db is the product obtained from the matrix multiplication of D_inv and b. """ self.A = copy.deepcopy(A) self.b = b self.n = A.colRank if x0 == 0: self.x0 = FullMatrix(self.n, 1) else: self.x0 = x0 self.tol = tol self.max_iter = max_iter self.D_inv = SparseMatrix(self.n, self.n) self.R = copy.deepcopy(A) for i in range(self.n): aii = A.retrieveElement(i, i) self.D_inv.addElement(i, i, 1 / aii) self.R.deleteElement(i, i) self.Db = self.D_inv.productAx(self.b) self.x = False
def setUp(self): data = [[0, 0, 0, 1.0], [0, 0, 1, 1.0], [0, 1, 0, 1.0], [0, 1, 1, 1.0], [1, 0, 0, 1.0], [1, 0, 1, 1.0], [1, 1, 0, 1.0], [1, 1, 1, 1.0], [2, 2, 2, 1.0], [2, 2, 3, 1.0], [2, 3, 2, 1.0], [3, 2, 2, 1.0], [2, 3, 3, 1.0], [3, 3, 2, 1.0], [3, 2, 3, 1.0], [3, 3, 3, 1.0], [4, 4, 4, 1.0], [4, 4, 5, 1.0], [4, 5, 4, 1.0], [4, 5, 5, 1.0], [5, 4, 4, 1.0], [5, 4, 5, 1.0], [5, 5, 4, 1.0], [5, 5, 5, 1.0]] matrix = SparseMatrix([6, 6, 6]) matrix.read_data(data) matrix.normalize() ebc = EBC(matrix, [3, 3, 3], 10, 1e-10) assigned_C = [[0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2]] cXY, objective = ebc.run(assigned_C) self.assertEquals(cXY, assigned_C) self.assertAlmostEqual(objective, 0.0) cXY, objective = ebc.run() # random initialization self.assertAlmostEqual(objective, 0.0)
def transfer_sparse_matrix(self, nrow, ncol, data): """ sparse matrix 구조 변환 :param nrow: length of unique user :param ncol: length of unique movie :param data: numpy matrix :return: SparseMatrix """ sparse = SparseMatrix(nrow, ncol) for i in range(len(data)): aparse.addRow(i, {k: v for k, v in enumerate(data[i, :])}) return sparse
def setUp(self): self.data = [["0", "0", 0.05], ["0", "1", 0.05], ["0", "2", 0.05], ["0", "3", 0.00], ["0", "4", 0.00], ["0", "5", 0.00], ["1", "0", 0.05], ["1", "1", 0.05], ["1", "2", 0.05], ["1", "3", 0.00], ["1", "4", 0.00], ["1", "5", 0.00], ["2", "0", 0.00], ["2", "1", 0.00], ["2", "2", 0.00], ["2", "3", 0.05], ["2", "4", 0.05], ["2", "5", 0.05], ["3", "0", 0.00], ["3", "1", 0.00], ["3", "2", 0.00], ["3", "3", 0.05], ["3", "4", 0.05], ["3", "5", 0.05], ["4", "0", 0.04], ["4", "1", 0.04], ["4", "2", 0.00], ["4", "3", 0.04], ["4", "4", 0.04], ["4", "5", 0.04], ["5", "0", 0.04], ["5", "1", 0.04], ["5", "2", 0.04], ["5", "3", 0.00], ["5", "4", 0.04], ["5", "5", 0.04]] self.matrix = SparseMatrix([6, 6]) self.matrix.read_data(self.data)
class TestEbc(unittest.TestCase): def setUp(self): self.data = [["0", "0", 0.05], ["0", "1", 0.05], ["0", "2", 0.05], ["0", "3", 0.00], ["0", "4", 0.00], ["0", "5", 0.00], ["1", "0", 0.05], ["1", "1", 0.05], ["1", "2", 0.05], ["1", "3", 0.00], ["1", "4", 0.00], ["1", "5", 0.00], ["2", "0", 0.00], ["2", "1", 0.00], ["2", "2", 0.00], ["2", "3", 0.05], ["2", "4", 0.05], ["2", "5", 0.05], ["3", "0", 0.00], ["3", "1", 0.00], ["3", "2", 0.00], ["3", "3", 0.05], ["3", "4", 0.05], ["3", "5", 0.05], ["4", "0", 0.04], ["4", "1", 0.04], ["4", "2", 0.00], ["4", "3", 0.04], ["4", "4", 0.04], ["4", "5", 0.04], ["5", "0", 0.04], ["5", "1", 0.04], ["5", "2", 0.04], ["5", "3", 0.00], ["5", "4", 0.04], ["5", "5", 0.04]] self.matrix = SparseMatrix([6, 6]) self.matrix.read_data(self.data) def testDataLoad(self): self.assertEquals(sorted(self.matrix.nonzero_elements.items(), key=itemgetter(0)), [((0, 0), 0.05), ((0, 1), 0.05), ((0, 2), 0.05), ((1, 0), 0.05), ((1, 1), 0.05), ((1, 2), 0.05), ((2, 3), 0.05), ((2, 4), 0.05), ((2, 5), 0.05), ((3, 3), 0.05), ((3, 4), 0.05), ((3, 5), 0.05), ((4, 0), 0.04), ((4, 1), 0.04), ((4, 3), 0.04), ((4, 4), 0.04), ((4, 5), 0.04), ((5, 0), 0.04), ((5, 1), 0.04), ((5, 2), 0.04), ((5, 4), 0.04), ((5, 5), 0.04)]) def testOldMatrix(self): with open("resources/matrix-ebc-paper-dense.tsv", "r") as f: data = [] for line in f: sl = line.split("\t") if len(sl) < 5: # headers continue data.append([sl[0], sl[2], float(sl[4])]) matrix = SparseMatrix([3514, 1232]) matrix.read_data(data) matrix.normalize() ebc = EBC(matrix, [30, 125], 10, 1e-10, 0.01) cXY, objective, it = ebc.run() print "objective: ", objective print "iterations: ", it self.assertEquals(len(ebc.pXY.nonzero_elements), 10007) self.assertEquals(len(set(ebc.cXY[0])), 30) self.assertEquals(len(set(ebc.cXY[1])), 125) def testOldMatrix3d(self): with open("resources/matrix-ebc-paper-dense-3d.tsv", "r") as f: data = [] for line in f: sl = line.split("\t") data.append([sl[0], sl[1], sl[2], float(sl[3])]) matrix = SparseMatrix([756, 996, 1232]) matrix.read_data(data) matrix.normalize() ebc = EBC(matrix, [30, 30, 10], 100, 1e-10, 0.01) cXY, objective, it = ebc.run() print "objective: ", objective print "iterations: ", it self.assertEquals(len(ebc.pXY.nonzero_elements), 10007) self.assertEquals(len(set(ebc.cXY[0])), 30) self.assertEquals(len(set(ebc.cXY[1])), 30) self.assertEquals(len(set(ebc.cXY[2])), 10) def test3DMatrix(self): data = [[0, 0, 0, 1.0], [0, 0, 1, 1.0], [0, 1, 0, 1.0], [0, 1, 1, 1.0], [1, 0, 0, 1.0], [1, 0, 1, 1.0], [1, 1, 0, 1.0], [1, 1, 1, 1.0], [2, 2, 2, 1.0], [2, 2, 3, 1.0], [2, 3, 2, 1.0], [3, 2, 2, 1.0], [2, 3, 3, 1.0], [3, 3, 2, 1.0], [3, 2, 3, 1.0], [3, 3, 3, 1.0], [4, 4, 4, 1.0], [4, 4, 5, 1.0], [4, 5, 4, 1.0], [4, 5, 5, 1.0], [5, 4, 4, 1.0], [5, 4, 5, 1.0], [5, 5, 4, 1.0], [5, 5, 5, 1.0]] matrix = SparseMatrix([6, 6, 6]) matrix.read_data(data) matrix.normalize() ebc = EBC(matrix, [3, 3, 3], 10, 1e-10, 0.01) assigned_C = [[0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2]] cXY, objective, it = ebc.run(assigned_C) self.assertEquals(cXY, assigned_C) self.assertAlmostEqual(objective, 0.0) self.assertEquals(it, 1) for i in range(100): cXY, objective, it = ebc.run() # random initialization print cXY, objective, it
class TestSanityCheck(unittest.TestCase): """ Do a sanity check for the EBC code, using the data from the original ITCC paper. """ def setUp(self): with open("resources/matrix-itcc-paper-orig.tsv", "r") as f: data = [l.split('\t') for l in f] self.matrix = SparseMatrix([6, 6]) self.matrix.read_data(data) self.matrix.normalize() def cartesian(self, arrays, out=None): arrays = [np.asarray(x) for x in arrays] dtype = arrays[0].dtype n = np.prod([x.size for x in arrays]) if out is None: out = np.zeros([n, len(arrays)], dtype=dtype) m = n / arrays[0].size out[:, 0] = np.repeat(arrays[0], m) if arrays[1:]: self.cartesian(arrays[1:], out=out[0:m, 1:]) for j in xrange(1, arrays[0].size): out[j * m:(j + 1) * m, 1:] = out[0:m, 1:] return out def testEbcOnSparseMatrix(self): ebc = EBC(self.matrix, [3, 2], 10, 1e-10, 0.01) cXY, objective, it = ebc.run(verbose=False) print "--> ebc" print "objective: ", objective print "iterations: ", it ebc = EBC(self.matrix, [3, 2], 10, 1e-10, 0.01) ebc.run(assigned_clusters=[[2, 0, 1, 1, 2, 2], [0, 0, 1, 0, 1, 1]], verbose=False) indices = [range(N_d) for N_d in ebc.pXY.N] index_list = self.cartesian(indices) approx_distribution = {} for location in index_list: q = 1.0 c_location = [] for i in range(len(location)): c_i = ebc.cXY[i][location[i]] c_location.append(c_i) q *= ebc.qXxHat[i][location[i]] q *= ebc.qXhatYhat.get(tuple(c_location)) approx_distribution[tuple(location)] = q self.assertAlmostEquals(approx_distribution[(0, 0)], 0.054) self.assertAlmostEquals(approx_distribution[(0, 1)], 0.054) self.assertAlmostEquals(approx_distribution[(0, 2)], 0.042) self.assertAlmostEquals(approx_distribution[(0, 3)], 0.0) self.assertAlmostEquals(approx_distribution[(0, 4)], 0.0) self.assertAlmostEquals(approx_distribution[(0, 5)], 0.0) self.assertAlmostEquals(approx_distribution[(1, 0)], 0.054) self.assertAlmostEquals(approx_distribution[(1, 1)], 0.054) self.assertAlmostEquals(approx_distribution[(1, 2)], 0.042) self.assertAlmostEquals(approx_distribution[(1, 3)], 0.0) self.assertAlmostEquals(approx_distribution[(1, 4)], 0.0) self.assertAlmostEquals(approx_distribution[(1, 5)], 0.0) self.assertAlmostEquals(approx_distribution[(2, 0)], 0.0) self.assertAlmostEquals(approx_distribution[(2, 1)], 0.0) self.assertAlmostEquals(approx_distribution[(2, 2)], 0.0) self.assertAlmostEquals(approx_distribution[(2, 3)], 0.042) self.assertAlmostEquals(approx_distribution[(2, 4)], 0.054) self.assertAlmostEquals(approx_distribution[(2, 5)], 0.054) self.assertAlmostEquals(approx_distribution[(3, 0)], 0.0) self.assertAlmostEquals(approx_distribution[(3, 1)], 0.0) self.assertAlmostEquals(approx_distribution[(3, 2)], 0.0) self.assertAlmostEquals(approx_distribution[(3, 3)], 0.042) self.assertAlmostEquals(approx_distribution[(3, 4)], 0.054) self.assertAlmostEquals(approx_distribution[(3, 5)], 0.054) self.assertAlmostEquals(approx_distribution[(4, 0)], 0.036) self.assertAlmostEquals(approx_distribution[(4, 1)], 0.036) self.assertAlmostEquals(approx_distribution[(4, 2)], 0.028) self.assertAlmostEquals(approx_distribution[(4, 3)], 0.028) self.assertAlmostEquals(approx_distribution[(4, 4)], 0.036) self.assertAlmostEquals(approx_distribution[(4, 5)], 0.036) self.assertAlmostEquals(approx_distribution[(5, 0)], 0.036) self.assertAlmostEquals(approx_distribution[(5, 1)], 0.036) self.assertAlmostEquals(approx_distribution[(5, 2)], 0.028) self.assertAlmostEquals(approx_distribution[(5, 3)], 0.028) self.assertAlmostEquals(approx_distribution[(5, 4)], 0.036) self.assertAlmostEquals(approx_distribution[(5, 5)], 0.036) def testEbc2dOnSparseMatrix(self): with open("resources/matrix-itcc-paper-orig.tsv", "r") as f: data = [l.split('\t') for l in f] m = ebc2d.get_matrix_from_data(data) # run without assigned clusters ebc = EBC2D(m, [3, 2], 10, 1e-10, 0.01) cXY, objective, it = ebc.run(verbose=False) print "--> ebc2d" print "objective: ", objective print "iterations: ", it # run with assigned clusters ebc = EBC2D(m, [3, 2], 10, 1e-10, 0.01) cXY, objective, it = ebc.run(assigned_clusters=[[2, 0, 1, 1, 2, 2], [0, 0, 1, 0, 1, 1]], verbose=False) indices = [range(N_d) for N_d in ebc.pXY.shape] index_list = self.cartesian(indices) approx_distribution = {} qX_xhat = [ebc.qX_xhat, ebc.qY_yhat] for location in index_list: q = 1.0 c_location = [] for i in range(len(location)): c_i = cXY[i][location[i]] c_location.append(c_i) q *= qX_xhat[i][location[i]] q *= ebc.qXhatYhat[c_location[0], c_location[1]] approx_distribution[tuple(location)] = q self.assertAlmostEquals(approx_distribution[(0, 0)], 0.054) self.assertAlmostEquals(approx_distribution[(0, 1)], 0.054) self.assertAlmostEquals(approx_distribution[(0, 2)], 0.042) self.assertAlmostEquals(approx_distribution[(0, 3)], 0.0) self.assertAlmostEquals(approx_distribution[(0, 4)], 0.0) self.assertAlmostEquals(approx_distribution[(0, 5)], 0.0) self.assertAlmostEquals(approx_distribution[(1, 0)], 0.054) self.assertAlmostEquals(approx_distribution[(1, 1)], 0.054) self.assertAlmostEquals(approx_distribution[(1, 2)], 0.042) self.assertAlmostEquals(approx_distribution[(1, 3)], 0.0) self.assertAlmostEquals(approx_distribution[(1, 4)], 0.0) self.assertAlmostEquals(approx_distribution[(1, 5)], 0.0) self.assertAlmostEquals(approx_distribution[(2, 0)], 0.0) self.assertAlmostEquals(approx_distribution[(2, 1)], 0.0) self.assertAlmostEquals(approx_distribution[(2, 2)], 0.0) self.assertAlmostEquals(approx_distribution[(2, 3)], 0.042) self.assertAlmostEquals(approx_distribution[(2, 4)], 0.054) self.assertAlmostEquals(approx_distribution[(2, 5)], 0.054) self.assertAlmostEquals(approx_distribution[(3, 0)], 0.0) self.assertAlmostEquals(approx_distribution[(3, 1)], 0.0) self.assertAlmostEquals(approx_distribution[(3, 2)], 0.0) self.assertAlmostEquals(approx_distribution[(3, 3)], 0.042) self.assertAlmostEquals(approx_distribution[(3, 4)], 0.054) self.assertAlmostEquals(approx_distribution[(3, 5)], 0.054) self.assertAlmostEquals(approx_distribution[(4, 0)], 0.036) self.assertAlmostEquals(approx_distribution[(4, 1)], 0.036) self.assertAlmostEquals(approx_distribution[(4, 2)], 0.028) self.assertAlmostEquals(approx_distribution[(4, 3)], 0.028) self.assertAlmostEquals(approx_distribution[(4, 4)], 0.036) self.assertAlmostEquals(approx_distribution[(4, 5)], 0.036) self.assertAlmostEquals(approx_distribution[(5, 0)], 0.036) self.assertAlmostEquals(approx_distribution[(5, 1)], 0.036) self.assertAlmostEquals(approx_distribution[(5, 2)], 0.028) self.assertAlmostEquals(approx_distribution[(5, 3)], 0.028) self.assertAlmostEquals(approx_distribution[(5, 4)], 0.036) self.assertAlmostEquals(approx_distribution[(5, 5)], 0.036)
def setUp(self): self.data = [l.split('\t') for l in open('tests/sample-matrix-file.txt', 'r').readlines()] self.matrix = SparseMatrix([2, 4, 9]) self.matrix.read_data(self.data)
class WilkinsonTestPartI(unittest.TestCase): """ Test suite for part I, when ground truth is known. """ def setUp(self): self.A = [[1, 2, 0, 0, 3], [4, 5, 6, 0, 0], [0, 7, 8, 0, 9], [0, 0, 0, 10, 0], [11, 0, 0, 0, 12]] self.x = [[5], [4], [3], [2], [1]] self.x_full = FullMatrix(5, 1) self.A_full = FullMatrix(5, 5) self.A_sparse = SparseMatrix(5, 5) for i in range(5): self.x_full.addElement(i, 0, self.x[i][0]) for j in range(5): self.A_full.addElement(i, j, self.A[i][j]) self.A_sparse.addElement(i, j, self.A[i][j]) self.A_full.augment(self.x_full) self.A_sparse.augment(self.x_full) def test_rowPermute(self): self.A_full.rowPermute(0, 2) self.A_full.rowPermute(0, 4) self.A_sparse.rowPermute(0, 2) self.A_sparse.rowPermute(0, 4) self.assertTrue(norm2(self.A_full, self.A_sparse) == 0.0) def test_rowScale(self): self.A_full.rowScale(0, 3, 3) self.A_full.rowScale(4, 1, -4.4) self.A_sparse.rowScale(0, 3, 3) self.A_sparse.rowScale(4, 1, -4.4) self.assertTrue(norm2(self.A_full, self.A_sparse) == 0.0) def test_productAx(self): x = self.A_full.deaugment() self.A_sparse.deaugment() full = self.A_full.productAx(x) sparse = self.A_sparse.productAx(x) self.assertTrue(norm2(full, sparse) == 0.0) def test_combined(self): self.A_full.rowPermute(0, 2) self.A_full.rowPermute(0, 4) self.A_sparse.rowPermute(0, 2) self.A_sparse.rowPermute(0, 4) self.A_full.rowScale(0, 3, 3) self.A_full.rowScale(4, 1, -4.4) self.A_sparse.rowScale(0, 3, 3) self.A_sparse.rowScale(4, 1, -4.4) x = self.A_sparse.deaugment() self.A_full.deaugment() full = self.A_full.productAx(x) sparse = self.A_sparse.productAx(x) self.assertTrue(norm2(full, sparse) == 0.0)
from matrix import FullMatrix, SparseMatrix import pickle file = open("memplus.mtx","r") #Opening memplus.mtx in reading mode line = file.readline() #Reading the first line which is a comment line = file.readline() #Reading the second line which contains rowRank, colRank, and number of entries #Extracting rowRank and colRank line = line.split() rowRank = int(line[0]) colRank = int(line[1]) num_elem = int(line[2]) #Initializing a SparseMatrix A_sparse = SparseMatrix(rowRank,colRank) #Reading the remainder of memplus.mtx to complete the SparseMatrix for i in range (1,num_elem+1): #Loop for reading till the end of memplus.mtx #Print for every 1000 elements added if i%1000 == 0: print ("Added %d elements." %i) ''' Reading the file and extracting the row, column, and the value of the element. ''' line = file.readline() line = line.split()
class Jacobi_Solver: """ An instance is a representation of the linear system to be solved using the Jacobi iterative method. """ def __init__(self, A, b, x0=0, tol=10**-9, max_iter=10**100): """ Initializes the matrix A, column matrix b, initial guess x0, tolerance, and maximum number of iterations max_iter. D_inv is the inverse of the diagonal matrix D obtained from the diagonal elements of A. R is the matrix obtained from (A - D) which is equivalent to (L+U) Db is the product obtained from the matrix multiplication of D_inv and b. """ self.A = copy.deepcopy(A) self.b = b self.n = A.colRank if x0 == 0: self.x0 = FullMatrix(self.n, 1) else: self.x0 = x0 self.tol = tol self.max_iter = max_iter self.D_inv = SparseMatrix(self.n, self.n) self.R = copy.deepcopy(A) for i in range(self.n): aii = A.retrieveElement(i, i) self.D_inv.addElement(i, i, 1 / aii) self.R.deleteElement(i, i) self.Db = self.D_inv.productAx(self.b) self.x = False def one_iter(self): """ One iteration of the Jacobi method. """ a = self.D_inv.productAx(self.R.productAx(self.x0)) x = FullMatrix(self.n, 1) for i in range(self.n): t = -1 * a.retrieveElement(i, 0) + self.Db.retrieveElement(i, 0) x.addElement(i, 0, t) return x def norm2(self, mat1, mat2): """ Calculates the second norm of [mat1 - mat2]. mat1: A matrix in either full or sparse format mat2: A matrix in either full or sparse format Returns: Frobenius second norm of the matrix (mat1 - mat2) """ result = 0 for i in range(mat1.rowRank): for k in range(mat2.colRank): a1 = mat1.retrieveElement(i, k) a2 = mat2.retrieveElement(i, k) s = (a1 - a2)**2 result += s return math.sqrt(result) def residual_norm(self): """ Calculates the normalized residual norm using self.x, self.A, and self.b. This must be called once the method solve has been called. """ b_calc = self.A.productAx(self.x) numerator = self.norm2(self.b, b_calc) denominator = 0 for i in range(self.b.rowRank): denominator += (self.b.retrieveElement(i, 0))**2 denominator = math.sqrt(denominator) return numerator / denominator def solve(self): """ Solves the sytem of linear equations using Jacobi iterative method without implementing any matrix preconditioning. """ num_iter = 1 while num_iter <= self.max_iter: x = self.one_iter() if self.norm2(x, self.x0) < self.tol: self.x = x break num_iter += 1 self.x0 = x self.x = x self.max_iter = num_iter