Beispiel #1
0
class TestMatrix(unittest.TestCase):
    def setUp(self):
        self.data = [l.split('\t') for l in open('tests/sample-matrix-file.txt', 'r').readlines()]
        self.matrix = SparseMatrix([2, 4, 9])
        self.matrix.read_data(self.data)

    def testMatrixInit(self):
        self.assertEquals(self.matrix.nonzero_elements[(1, 3, 7)], 2.0)
        self.assertEquals(self.matrix.nonzero_elements[(0, 0, 0)], 2.0)
        self.assertEquals(self.matrix.nonzero_elements[(0, 0, 2)], 2.0)
        self.assertEquals(self.matrix.nonzero_elements[(1, 1, 5)], 7.0)
        self.assertEquals(self.matrix.nonzero_elements[(1, 1, 3)], 3.0)
        self.assertEquals(self.matrix.nonzero_elements[(1, 3, 6)], 2.0)
        self.assertEquals(self.matrix.nonzero_elements[(1, 3, 8)], 2.0)
        self.assertEquals(self.matrix.nonzero_elements[(0, 0, 1)], 2.0)
        self.assertEquals(self.matrix.nonzero_elements[(1, 1, 4)], 2.0)
        self.assertEquals(self.matrix.nonzero_elements[(1, 2, 5)], 2.0)
        self.assertEquals(len(self.matrix.nonzero_elements), 10)
        self.assertEquals(self.matrix.feature_ids[0], {'mice': 1, 'patient': 0})
        self.assertEquals(self.matrix.feature_ids[1], {'R92Q': 1, 'R91W': 2, 'Val30Met': 0, 'R90W': 3})
        self.assertEquals(self.matrix.feature_ids[2], {'START_ENTITY|nmod|END_ENTITY': 1,
                                                       'START_ENTITY|nummod|END_ENTITY': 5,
                                                       'FAP|compound|END_ENTITY': 2,
                                                       'expression|nmod|END_ENTITY': 8,
                                                       '+|compound|END_ENTITY': 7,
                                                       'mice|nummod|END_ENTITY': 3,
                                                       'homozygous|nsubj|START_ENTITY': 6,
                                                       'mutation|appos|END_ENTITY': 4,
                                                       'START_ENTITY|nmod|FAP': 0})

    def testShuffle(self):
        shuffled_matrix = self.matrix.shuffle()
        self.assertEquals(len(shuffled_matrix.nonzero_elements), len(self.matrix.nonzero_elements))
        self.assertEquals(set(shuffled_matrix.nonzero_elements.values()), set(self.matrix.nonzero_elements.values()))
        print("shuffled matrix elements: ", shuffled_matrix.nonzero_elements)
Beispiel #2
0
def main():
    data_file = sys.argv[1]
    ebc_cols = [int(e) for e in sys.argv[2].split(",")]
    K = [int(e) for e in sys.argv[3].split(",")]
    N_runs = int(sys.argv[4])
    output_file = sys.argv[5]
    jitter_max = float(sys.argv[6])
    max_iterations_ebc = int(sys.argv[7])
    entity_cols = [int(e) for e in sys.argv[8].split(",")]
    object_toler = float(sys.argv[9])

    # get original data
    raw_data = [line.split("\t") for line in open(data_file, "r")]
    data = [[d[i] for i in ebc_cols] for d in raw_data]
    data_dimensions = len(data[0]) - 1

    # get axis length for each dimension
    N = []
    for dim in range(data_dimensions):
        N.append(len(set([d[dim] for d in data])))
    print(N)

    # set up matrix
    M = SparseMatrix(N)
    M.read_data(data)
    M.normalize()

    # set up entity map to ids
    entity_map = defaultdict(tuple)
    for d in raw_data:
        entity = tuple([d[i] for i in entity_cols])
        entity_ids = tuple([M.feature_ids[ebc_cols.index(i)][d[i]] for i in entity_cols])
        entity_map[entity_ids] = entity

    # figure out which ebc columns the entity columns correspond to
    entity_column_indices = []
    for c in ebc_cols:
        if c in entity_cols:
            entity_column_indices.append(ebc_cols.index(c))

    # run EBC and get entity cluster assignments
    ebc_M = EBC(M, K, max_iterations_ebc, jitter_max, object_toler)
    clusters = defaultdict(list)
    for t in range(N_runs):
        print "run ", t
        cXY_M, objective_M, it_M = ebc_M.run()
        for e1 in entity_map.keys():
            c1_i = tuple([cXY_M[i][e1[i]] for i in entity_column_indices])
            clusters[e1].append(c1_i)

    # print assignments
    writer = open(output_file, "w")
    for k in clusters:
        e1_name = entity_map[k]
        writer.write(",".join([str(e) for e in k]) + "\t" +
                     ",".join([e for e in e1_name]) + "\t" + "\t".join([",".join([str(f) for f in e])
                                                                        for e in clusters[k]]) + "\n")
        writer.flush()
    writer.close()
def from_csv_crs(rowPtr, colInd, value, file_out):
    """
	Generates a binary file named file_out which represents the SparseMatrix
	generated from the csv files containing row pointer, column indices, and
	values in files named rowPtr, colInd, and value respectively.
	Precondition: Number of values in colInd and value is strictly greater
	than the number of values in rowPtr. Each entry in rowPtr and colInd is
	an integer and each entry in value is either a float or an integer.
	"""
    f1 = open(rowPtr, "r")
    f2 = open(colInd, "r")
    f3 = open(value, "r")
    rowP = []
    colI = []
    val = []

    while True:

        l1 = f1.readline()
        if l1 == "":
            f1.close()
            break

        else:
            l1 = int(l1.strip("\n")) - 1
            rowP.append(l1)

            l2 = int(f2.readline().strip("\n")) - 1
            colI.append(l2)
            l3 = np.float64(f3.readline().strip("\n"))
            val.append(l3)

    while True:
        l2 = f2.readline()
        if l2 == "":
            f2.close()
            f3.close()
            break

        else:
            l2 = int(l2.strip("\n")) - 1
            colI.append(l2)

            l3 = np.float64(f3.readline().strip("\n"))
            val.append(l3)

    A_sparse = SparseMatrix(len(rowP) - 1, len(rowP) - 1)
    A_sparse._rowPtr = rowP[:]
    A_sparse._colInd = colI[:]
    A_sparse._value = val[:]
    #Creating a binary file and dumping the SparseMatrix A_sparse in it
    file = open(file_out, "wb")
    pickle.dump(A_sparse, file)
    file.close()
Beispiel #4
0
def main():
    """ An example run of EBC. """
    with open("resources/matrix-ebc-paper-sparse.tsv", "r") as f:
        data = []
        for line in f:
            sl = line.split("\t")
            if len(sl) < 5:  # headers
                continue
            data.append([sl[0], sl[2], float(sl[4])])

    matrix = SparseMatrix([14052, 7272])
    matrix.read_data(data)
    matrix.normalize()
    ebc = EBC(matrix, [30, 125], 10, 1e-10, 0.01)
    cXY, objective, it = ebc.run()
Beispiel #5
0
    def setUp(self):
        with open("resources/matrix-itcc-paper-orig.tsv", "r") as f:
            data = [l.split('\t') for l in f]

        self.matrix = SparseMatrix([6, 6])
        self.matrix.read_data(data)
        self.matrix.normalize()
    def setUp(self):
        self.A = [[1, 2, 0, 0, 3], [4, 5, 6, 0, 0], [0, 7, 8, 0, 9],
                  [0, 0, 0, 10, 0], [11, 0, 0, 0, 12]]

        self.x = [[5], [4], [3], [2], [1]]

        self.x_full = FullMatrix(5, 1)

        self.A_full = FullMatrix(5, 5)
        self.A_sparse = SparseMatrix(5, 5)

        for i in range(5):
            self.x_full.addElement(i, 0, self.x[i][0])
            for j in range(5):
                self.A_full.addElement(i, j, self.A[i][j])
                self.A_sparse.addElement(i, j, self.A[i][j])
Beispiel #7
0
    def testOldMatrix3d(self):
        with open("resources/matrix-ebc-paper-dense-3d.tsv", "r") as f:
            data = []
            for line in f:
                sl = line.split("\t")
                data.append([sl[0], sl[1], sl[2], float(sl[3])])

        matrix = SparseMatrix([756, 996, 1232])
        matrix.read_data(data)
        matrix.normalize()
        ebc = EBC(matrix, [30, 30, 10], 100, 1e-10, 0.01)
        cXY, objective, it = ebc.run()
        print "objective: ", objective
        print "iterations: ", it
        self.assertEquals(len(ebc.pXY.nonzero_elements), 10007)
        self.assertEquals(len(set(ebc.cXY[0])), 30)
        self.assertEquals(len(set(ebc.cXY[1])), 30)
        self.assertEquals(len(set(ebc.cXY[2])), 10)
Beispiel #8
0
    def setUp(self):
        with open("resources/matrix-ebc-paper-sparse.tsv", "r") as f:
            data = []
            for line in f:
                sl = line.split("\t")
                if len(sl) < 5:  # headers
                    continue
                data.append([sl[0], sl[2], float(sl[4])])

        self.matrix = SparseMatrix([14052, 7272])
        self.matrix.read_data(data)
        self.matrix.normalize()
Beispiel #9
0
def compareRandom(num_trials, tensor_dimensions, matrix_data, cluster_dimensions,
                  maxit_ebc, jitter_max_ebc, objective_tolerance):
    deltas = []
    iterations_M = []
    iterations_Mr = []
    noconverge_M = 0
    noconverge_Mr = 0
    for j in range(num_trials):
        print "Trial ", j

        M = SparseMatrix(tensor_dimensions)
        M.read_data(matrix_data)
        Mr = M.shuffle()  # could also be M.shuffle_old()

        M.normalize()

        ebc_M = EBC(M, cluster_dimensions, maxit_ebc, jitter_max_ebc, objective_tolerance)
        cXY_M, objective_M, it_M = ebc_M.run()
        if it_M == maxit_ebc:
            noconverge_M += 1
        else:
            iterations_M.append(it_M)

        Mr.normalize()

        ebc_Mr = EBC(Mr, cluster_dimensions, maxit_ebc, jitter_max_ebc, objective_tolerance)
        cXY_Mr, objective_Mr, it_Mr = ebc_Mr.run()
        if it_Mr == maxit_ebc:
            noconverge_Mr += 1
        else:
            iterations_Mr.append(it_Mr)

        deltas.append(objective_M - objective_Mr)
    return deltas, iterations_M, iterations_Mr, noconverge_M, noconverge_Mr
Beispiel #10
0
    def calculate_joint_cluster_distribution(self, cXY, K, pXY):
        """ Calculate the joint cluster distribution q(X',Y') using the current prob distribution and
        cluster assignments. (Here we use X' to denote X_hat)

        Args:
            cXY: current cluster assignments for each axis
            K: numbers of clusters along each axis
            pXY: original probability distribution matrix

        Return:
            qXhatYhat: the joint cluster distribution
        """
        if not isinstance(pXY, SparseMatrix):
            raise Exception("Matrix argument to calculate_joint_cluster_distribution not an instance of SparseMatrix.")
        qXhatYhat = SparseMatrix(K)  # joint distribution over clusters
        for coords in pXY.nonzero_elements:
            # find the coordinates of the cluster for this element
            cluster_coords = []
            for i in xrange(len(coords)):
                cluster_coords.append(cXY[i][coords[i]])
            qXhatYhat.add_value(tuple(cluster_coords), pXY.nonzero_elements[coords])
        return qXhatYhat
Beispiel #11
0
    def test3DMatrix(self):
        data = [[0, 0, 0, 1.0],
                [0, 0, 1, 1.0],
                [0, 1, 0, 1.0],
                [0, 1, 1, 1.0],
                [1, 0, 0, 1.0],
                [1, 0, 1, 1.0],
                [1, 1, 0, 1.0],
                [1, 1, 1, 1.0],
                [2, 2, 2, 1.0],
                [2, 2, 3, 1.0],
                [2, 3, 2, 1.0],
                [3, 2, 2, 1.0],
                [2, 3, 3, 1.0],
                [3, 3, 2, 1.0],
                [3, 2, 3, 1.0],
                [3, 3, 3, 1.0],
                [4, 4, 4, 1.0],
                [4, 4, 5, 1.0],
                [4, 5, 4, 1.0],
                [4, 5, 5, 1.0],
                [5, 4, 4, 1.0],
                [5, 4, 5, 1.0],
                [5, 5, 4, 1.0],
                [5, 5, 5, 1.0]]
        matrix = SparseMatrix([6, 6, 6])
        matrix.read_data(data)
        matrix.normalize()
        ebc = EBC(matrix, [3, 3, 3], 10, 1e-10, 0.01)
        assigned_C = [[0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2]]
        cXY, objective, it = ebc.run(assigned_C)
        self.assertEquals(cXY, assigned_C)
        self.assertAlmostEqual(objective, 0.0)
        self.assertEquals(it, 1)

        for i in range(100):
            cXY, objective, it = ebc.run()  # random initialization
            print cXY, objective, it
def from_mtx(file_in, file_out):
    """
	Generates a binary file named file_out which represents the SparseMatrix
	generated from the .mtx file named file_in.
	"""
    file = open(file_in, "r")  #Opening file_in in reading mode
    line = file.readline()  #Reading the first line which is a comment
    line = file.readline(
    )  #Reading the second line which contains rowRank, colRank, and number of entries

    #Extracting rowRank and colRank
    line = line.split()
    rowRank = int(line[0])
    colRank = int(line[1])
    num_elem = int(line[2])

    #Initializing a SparseMatrix
    A_sparse = SparseMatrix(rowRank, colRank)

    #Reading the remainder of the file_in file to complete the SparseMatrix

    for i in range(1, num_elem + 1):  #Loop for reading till the end of file_in

        # Extracting the row, column, and the value of the element.
        line = file.readline()
        line = line.split()
        row_coord = int(line[0]) - 1
        col_coord = int(line[1]) - 1
        value = np.float64(line[2].strip("\n"))
        A_sparse.addElement(row_coord, col_coord, value)

    #Finished reading file_in
    file.close()

    #Creating a binary file and dumping the SparseMatrix A_sparse in it
    file = open(file_out, "wb")
    pickle.dump(A_sparse, file)
    file.close()
Beispiel #13
0
class TestBenchmarkEBC(unittest.TestCase):
    """ Benchmark the EBC code as a unittest, using the sparse matrix data. """
    def setUp(self):
        with open("resources/matrix-ebc-paper-sparse.tsv", "r") as f:
            data = []
            for line in f:
                sl = line.split("\t")
                if len(sl) < 5:  # headers
                    continue
                data.append([sl[0], sl[2], float(sl[4])])

        self.matrix = SparseMatrix([14052, 7272])
        self.matrix.read_data(data)
        self.matrix.normalize()

    def testEbcOnSparseMatrix(self):
        ebc = EBC(self.matrix, [30, 125], 10, 1e-10, 0.01)
        cXY, objective, it = ebc.run()
        print "objective: ", objective
        print "iterations: ", it
        self.assertEquals(len(ebc.pXY.nonzero_elements), 29456)
        self.assertEquals(len(set(ebc.cXY[0])), 30)
        self.assertEquals(len(set(ebc.cXY[1])), 125)
Beispiel #14
0
    def __init__(self, A, b, x0=0, tol=10**-9, max_iter=10**100):
        """
		Initializes the matrix A, column matrix b, initial guess x0, tolerance,
		and maximum number of iterations max_iter.

		D_inv is the inverse of the diagonal matrix D obtained from the diagonal elements of A.
		R is the matrix obtained from (A - D) which is equivalent to (L+U)

		Db is the product obtained from the matrix multiplication of D_inv and b.

		"""
        self.A = copy.deepcopy(A)

        self.b = b

        self.n = A.colRank

        if x0 == 0:
            self.x0 = FullMatrix(self.n, 1)
        else:
            self.x0 = x0

        self.tol = tol
        self.max_iter = max_iter

        self.D_inv = SparseMatrix(self.n, self.n)

        self.R = copy.deepcopy(A)

        for i in range(self.n):
            aii = A.retrieveElement(i, i)
            self.D_inv.addElement(i, i, 1 / aii)
            self.R.deleteElement(i, i)

        self.Db = self.D_inv.productAx(self.b)

        self.x = False
Beispiel #15
0
    def setUp(self):
        data = [[0, 0, 0, 1.0],
                [0, 0, 1, 1.0],
                [0, 1, 0, 1.0],
                [0, 1, 1, 1.0],
                [1, 0, 0, 1.0],
                [1, 0, 1, 1.0],
                [1, 1, 0, 1.0],
                [1, 1, 1, 1.0],
                [2, 2, 2, 1.0],
                [2, 2, 3, 1.0],
                [2, 3, 2, 1.0],
                [3, 2, 2, 1.0],
                [2, 3, 3, 1.0],
                [3, 3, 2, 1.0],
                [3, 2, 3, 1.0],
                [3, 3, 3, 1.0],
                [4, 4, 4, 1.0],
                [4, 4, 5, 1.0],
                [4, 5, 4, 1.0],
                [4, 5, 5, 1.0],
                [5, 4, 4, 1.0],
                [5, 4, 5, 1.0],
                [5, 5, 4, 1.0],
                [5, 5, 5, 1.0]]
        matrix = SparseMatrix([6, 6, 6])
        matrix.read_data(data)
        matrix.normalize()

        ebc = EBC(matrix, [3, 3, 3], 10, 1e-10)
        assigned_C = [[0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2]]
        cXY, objective = ebc.run(assigned_C)
        self.assertEquals(cXY, assigned_C)
        self.assertAlmostEqual(objective, 0.0)
        cXY, objective = ebc.run()  # random initialization
        self.assertAlmostEqual(objective, 0.0)
Beispiel #16
0
    def transfer_sparse_matrix(self, nrow, ncol, data):
        """
        sparse matrix 구조 변환

        :param nrow: length of unique user
        :param ncol: length of unique movie
        :param data: numpy matrix

        :return: SparseMatrix
        """

        sparse = SparseMatrix(nrow, ncol)
        for i in range(len(data)):
            aparse.addRow(i, {k: v for k, v in enumerate(data[i, :])})
        return sparse
Beispiel #17
0
 def setUp(self):
     self.data = [["0", "0", 0.05],
                  ["0", "1", 0.05],
                  ["0", "2", 0.05],
                  ["0", "3", 0.00],
                  ["0", "4", 0.00],
                  ["0", "5", 0.00],
                  ["1", "0", 0.05],
                  ["1", "1", 0.05],
                  ["1", "2", 0.05],
                  ["1", "3", 0.00],
                  ["1", "4", 0.00],
                  ["1", "5", 0.00],
                  ["2", "0", 0.00],
                  ["2", "1", 0.00],
                  ["2", "2", 0.00],
                  ["2", "3", 0.05],
                  ["2", "4", 0.05],
                  ["2", "5", 0.05],
                  ["3", "0", 0.00],
                  ["3", "1", 0.00],
                  ["3", "2", 0.00],
                  ["3", "3", 0.05],
                  ["3", "4", 0.05],
                  ["3", "5", 0.05],
                  ["4", "0", 0.04],
                  ["4", "1", 0.04],
                  ["4", "2", 0.00],
                  ["4", "3", 0.04],
                  ["4", "4", 0.04],
                  ["4", "5", 0.04],
                  ["5", "0", 0.04],
                  ["5", "1", 0.04],
                  ["5", "2", 0.04],
                  ["5", "3", 0.00],
                  ["5", "4", 0.04],
                  ["5", "5", 0.04]]
     self.matrix = SparseMatrix([6, 6])
     self.matrix.read_data(self.data)
Beispiel #18
0
class TestEbc(unittest.TestCase):
    def setUp(self):
        self.data = [["0", "0", 0.05],
                     ["0", "1", 0.05],
                     ["0", "2", 0.05],
                     ["0", "3", 0.00],
                     ["0", "4", 0.00],
                     ["0", "5", 0.00],
                     ["1", "0", 0.05],
                     ["1", "1", 0.05],
                     ["1", "2", 0.05],
                     ["1", "3", 0.00],
                     ["1", "4", 0.00],
                     ["1", "5", 0.00],
                     ["2", "0", 0.00],
                     ["2", "1", 0.00],
                     ["2", "2", 0.00],
                     ["2", "3", 0.05],
                     ["2", "4", 0.05],
                     ["2", "5", 0.05],
                     ["3", "0", 0.00],
                     ["3", "1", 0.00],
                     ["3", "2", 0.00],
                     ["3", "3", 0.05],
                     ["3", "4", 0.05],
                     ["3", "5", 0.05],
                     ["4", "0", 0.04],
                     ["4", "1", 0.04],
                     ["4", "2", 0.00],
                     ["4", "3", 0.04],
                     ["4", "4", 0.04],
                     ["4", "5", 0.04],
                     ["5", "0", 0.04],
                     ["5", "1", 0.04],
                     ["5", "2", 0.04],
                     ["5", "3", 0.00],
                     ["5", "4", 0.04],
                     ["5", "5", 0.04]]
        self.matrix = SparseMatrix([6, 6])
        self.matrix.read_data(self.data)

    def testDataLoad(self):
        self.assertEquals(sorted(self.matrix.nonzero_elements.items(), key=itemgetter(0)),
                          [((0, 0), 0.05), ((0, 1), 0.05), ((0, 2), 0.05), ((1, 0), 0.05), ((1, 1), 0.05),
                           ((1, 2), 0.05), ((2, 3), 0.05), ((2, 4), 0.05), ((2, 5), 0.05), ((3, 3), 0.05),
                           ((3, 4), 0.05), ((3, 5), 0.05), ((4, 0), 0.04), ((4, 1), 0.04), ((4, 3), 0.04),
                           ((4, 4), 0.04), ((4, 5), 0.04), ((5, 0), 0.04), ((5, 1), 0.04), ((5, 2), 0.04),
                           ((5, 4), 0.04), ((5, 5), 0.04)])

    def testOldMatrix(self):
        with open("resources/matrix-ebc-paper-dense.tsv", "r") as f:
            data = []
            for line in f:
                sl = line.split("\t")
                if len(sl) < 5:  # headers
                    continue
                data.append([sl[0], sl[2], float(sl[4])])

        matrix = SparseMatrix([3514, 1232])
        matrix.read_data(data)
        matrix.normalize()
        ebc = EBC(matrix, [30, 125], 10, 1e-10, 0.01)
        cXY, objective, it = ebc.run()
        print "objective: ", objective
        print "iterations: ", it
        self.assertEquals(len(ebc.pXY.nonzero_elements), 10007)
        self.assertEquals(len(set(ebc.cXY[0])), 30)
        self.assertEquals(len(set(ebc.cXY[1])), 125)

    def testOldMatrix3d(self):
        with open("resources/matrix-ebc-paper-dense-3d.tsv", "r") as f:
            data = []
            for line in f:
                sl = line.split("\t")
                data.append([sl[0], sl[1], sl[2], float(sl[3])])

        matrix = SparseMatrix([756, 996, 1232])
        matrix.read_data(data)
        matrix.normalize()
        ebc = EBC(matrix, [30, 30, 10], 100, 1e-10, 0.01)
        cXY, objective, it = ebc.run()
        print "objective: ", objective
        print "iterations: ", it
        self.assertEquals(len(ebc.pXY.nonzero_elements), 10007)
        self.assertEquals(len(set(ebc.cXY[0])), 30)
        self.assertEquals(len(set(ebc.cXY[1])), 30)
        self.assertEquals(len(set(ebc.cXY[2])), 10)

    def test3DMatrix(self):
        data = [[0, 0, 0, 1.0],
                [0, 0, 1, 1.0],
                [0, 1, 0, 1.0],
                [0, 1, 1, 1.0],
                [1, 0, 0, 1.0],
                [1, 0, 1, 1.0],
                [1, 1, 0, 1.0],
                [1, 1, 1, 1.0],
                [2, 2, 2, 1.0],
                [2, 2, 3, 1.0],
                [2, 3, 2, 1.0],
                [3, 2, 2, 1.0],
                [2, 3, 3, 1.0],
                [3, 3, 2, 1.0],
                [3, 2, 3, 1.0],
                [3, 3, 3, 1.0],
                [4, 4, 4, 1.0],
                [4, 4, 5, 1.0],
                [4, 5, 4, 1.0],
                [4, 5, 5, 1.0],
                [5, 4, 4, 1.0],
                [5, 4, 5, 1.0],
                [5, 5, 4, 1.0],
                [5, 5, 5, 1.0]]
        matrix = SparseMatrix([6, 6, 6])
        matrix.read_data(data)
        matrix.normalize()
        ebc = EBC(matrix, [3, 3, 3], 10, 1e-10, 0.01)
        assigned_C = [[0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 2, 2]]
        cXY, objective, it = ebc.run(assigned_C)
        self.assertEquals(cXY, assigned_C)
        self.assertAlmostEqual(objective, 0.0)
        self.assertEquals(it, 1)

        for i in range(100):
            cXY, objective, it = ebc.run()  # random initialization
            print cXY, objective, it
Beispiel #19
0
class TestSanityCheck(unittest.TestCase):
    """ Do a sanity check for the EBC code, using the data from the original ITCC paper. """
    def setUp(self):
        with open("resources/matrix-itcc-paper-orig.tsv", "r") as f:
            data = [l.split('\t') for l in f]

        self.matrix = SparseMatrix([6, 6])
        self.matrix.read_data(data)
        self.matrix.normalize()

    def cartesian(self, arrays, out=None):
        arrays = [np.asarray(x) for x in arrays]
        dtype = arrays[0].dtype

        n = np.prod([x.size for x in arrays])
        if out is None:
            out = np.zeros([n, len(arrays)], dtype=dtype)

        m = n / arrays[0].size
        out[:, 0] = np.repeat(arrays[0], m)
        if arrays[1:]:
            self.cartesian(arrays[1:], out=out[0:m, 1:])
            for j in xrange(1, arrays[0].size):
                out[j * m:(j + 1) * m, 1:] = out[0:m, 1:]
        return out

    def testEbcOnSparseMatrix(self):
        ebc = EBC(self.matrix, [3, 2], 10, 1e-10, 0.01)
        cXY, objective, it = ebc.run(verbose=False)
        print "--> ebc"
        print "objective: ", objective
        print "iterations: ", it

        ebc = EBC(self.matrix, [3, 2], 10, 1e-10, 0.01)
        ebc.run(assigned_clusters=[[2, 0, 1, 1, 2, 2], [0, 0, 1, 0, 1, 1]], verbose=False)
        indices = [range(N_d) for N_d in ebc.pXY.N]
        index_list = self.cartesian(indices)
        approx_distribution = {}
        for location in index_list:
            q = 1.0
            c_location = []
            for i in range(len(location)):
                c_i = ebc.cXY[i][location[i]]
                c_location.append(c_i)
                q *= ebc.qXxHat[i][location[i]]
            q *= ebc.qXhatYhat.get(tuple(c_location))
            approx_distribution[tuple(location)] = q

        self.assertAlmostEquals(approx_distribution[(0, 0)], 0.054)
        self.assertAlmostEquals(approx_distribution[(0, 1)], 0.054)
        self.assertAlmostEquals(approx_distribution[(0, 2)], 0.042)
        self.assertAlmostEquals(approx_distribution[(0, 3)], 0.0)
        self.assertAlmostEquals(approx_distribution[(0, 4)], 0.0)
        self.assertAlmostEquals(approx_distribution[(0, 5)], 0.0)
        self.assertAlmostEquals(approx_distribution[(1, 0)], 0.054)
        self.assertAlmostEquals(approx_distribution[(1, 1)], 0.054)
        self.assertAlmostEquals(approx_distribution[(1, 2)], 0.042)
        self.assertAlmostEquals(approx_distribution[(1, 3)], 0.0)
        self.assertAlmostEquals(approx_distribution[(1, 4)], 0.0)
        self.assertAlmostEquals(approx_distribution[(1, 5)], 0.0)
        self.assertAlmostEquals(approx_distribution[(2, 0)], 0.0)
        self.assertAlmostEquals(approx_distribution[(2, 1)], 0.0)
        self.assertAlmostEquals(approx_distribution[(2, 2)], 0.0)
        self.assertAlmostEquals(approx_distribution[(2, 3)], 0.042)
        self.assertAlmostEquals(approx_distribution[(2, 4)], 0.054)
        self.assertAlmostEquals(approx_distribution[(2, 5)], 0.054)
        self.assertAlmostEquals(approx_distribution[(3, 0)], 0.0)
        self.assertAlmostEquals(approx_distribution[(3, 1)], 0.0)
        self.assertAlmostEquals(approx_distribution[(3, 2)], 0.0)
        self.assertAlmostEquals(approx_distribution[(3, 3)], 0.042)
        self.assertAlmostEquals(approx_distribution[(3, 4)], 0.054)
        self.assertAlmostEquals(approx_distribution[(3, 5)], 0.054)
        self.assertAlmostEquals(approx_distribution[(4, 0)], 0.036)
        self.assertAlmostEquals(approx_distribution[(4, 1)], 0.036)
        self.assertAlmostEquals(approx_distribution[(4, 2)], 0.028)
        self.assertAlmostEquals(approx_distribution[(4, 3)], 0.028)
        self.assertAlmostEquals(approx_distribution[(4, 4)], 0.036)
        self.assertAlmostEquals(approx_distribution[(4, 5)], 0.036)
        self.assertAlmostEquals(approx_distribution[(5, 0)], 0.036)
        self.assertAlmostEquals(approx_distribution[(5, 1)], 0.036)
        self.assertAlmostEquals(approx_distribution[(5, 2)], 0.028)
        self.assertAlmostEquals(approx_distribution[(5, 3)], 0.028)
        self.assertAlmostEquals(approx_distribution[(5, 4)], 0.036)
        self.assertAlmostEquals(approx_distribution[(5, 5)], 0.036)

    def testEbc2dOnSparseMatrix(self):
        with open("resources/matrix-itcc-paper-orig.tsv", "r") as f:
            data = [l.split('\t') for l in f]
        m = ebc2d.get_matrix_from_data(data)
        # run without assigned clusters
        ebc = EBC2D(m, [3, 2], 10, 1e-10, 0.01)
        cXY, objective, it = ebc.run(verbose=False)
        print "--> ebc2d"
        print "objective: ", objective
        print "iterations: ", it

        # run with assigned clusters
        ebc = EBC2D(m, [3, 2], 10, 1e-10, 0.01)
        cXY, objective, it = ebc.run(assigned_clusters=[[2, 0, 1, 1, 2, 2], [0, 0, 1, 0, 1, 1]], verbose=False)
        indices = [range(N_d) for N_d in ebc.pXY.shape]
        index_list = self.cartesian(indices)
        approx_distribution = {}
        qX_xhat = [ebc.qX_xhat, ebc.qY_yhat]
        for location in index_list:
            q = 1.0
            c_location = []
            for i in range(len(location)):
                c_i = cXY[i][location[i]]
                c_location.append(c_i)
                q *= qX_xhat[i][location[i]]
            q *= ebc.qXhatYhat[c_location[0], c_location[1]]
            approx_distribution[tuple(location)] = q

        self.assertAlmostEquals(approx_distribution[(0, 0)], 0.054)
        self.assertAlmostEquals(approx_distribution[(0, 1)], 0.054)
        self.assertAlmostEquals(approx_distribution[(0, 2)], 0.042)
        self.assertAlmostEquals(approx_distribution[(0, 3)], 0.0)
        self.assertAlmostEquals(approx_distribution[(0, 4)], 0.0)
        self.assertAlmostEquals(approx_distribution[(0, 5)], 0.0)
        self.assertAlmostEquals(approx_distribution[(1, 0)], 0.054)
        self.assertAlmostEquals(approx_distribution[(1, 1)], 0.054)
        self.assertAlmostEquals(approx_distribution[(1, 2)], 0.042)
        self.assertAlmostEquals(approx_distribution[(1, 3)], 0.0)
        self.assertAlmostEquals(approx_distribution[(1, 4)], 0.0)
        self.assertAlmostEquals(approx_distribution[(1, 5)], 0.0)
        self.assertAlmostEquals(approx_distribution[(2, 0)], 0.0)
        self.assertAlmostEquals(approx_distribution[(2, 1)], 0.0)
        self.assertAlmostEquals(approx_distribution[(2, 2)], 0.0)
        self.assertAlmostEquals(approx_distribution[(2, 3)], 0.042)
        self.assertAlmostEquals(approx_distribution[(2, 4)], 0.054)
        self.assertAlmostEquals(approx_distribution[(2, 5)], 0.054)
        self.assertAlmostEquals(approx_distribution[(3, 0)], 0.0)
        self.assertAlmostEquals(approx_distribution[(3, 1)], 0.0)
        self.assertAlmostEquals(approx_distribution[(3, 2)], 0.0)
        self.assertAlmostEquals(approx_distribution[(3, 3)], 0.042)
        self.assertAlmostEquals(approx_distribution[(3, 4)], 0.054)
        self.assertAlmostEquals(approx_distribution[(3, 5)], 0.054)
        self.assertAlmostEquals(approx_distribution[(4, 0)], 0.036)
        self.assertAlmostEquals(approx_distribution[(4, 1)], 0.036)
        self.assertAlmostEquals(approx_distribution[(4, 2)], 0.028)
        self.assertAlmostEquals(approx_distribution[(4, 3)], 0.028)
        self.assertAlmostEquals(approx_distribution[(4, 4)], 0.036)
        self.assertAlmostEquals(approx_distribution[(4, 5)], 0.036)
        self.assertAlmostEquals(approx_distribution[(5, 0)], 0.036)
        self.assertAlmostEquals(approx_distribution[(5, 1)], 0.036)
        self.assertAlmostEquals(approx_distribution[(5, 2)], 0.028)
        self.assertAlmostEquals(approx_distribution[(5, 3)], 0.028)
        self.assertAlmostEquals(approx_distribution[(5, 4)], 0.036)
        self.assertAlmostEquals(approx_distribution[(5, 5)], 0.036)
Beispiel #20
0
 def setUp(self):
     self.data = [l.split('\t') for l in open('tests/sample-matrix-file.txt', 'r').readlines()]
     self.matrix = SparseMatrix([2, 4, 9])
     self.matrix.read_data(self.data)
Beispiel #21
0
class WilkinsonTestPartI(unittest.TestCase):
    """
	Test suite for part I, when ground truth is known.
	"""
    def setUp(self):
        self.A = [[1, 2, 0, 0, 3], [4, 5, 6, 0, 0], [0, 7, 8, 0, 9],
                  [0, 0, 0, 10, 0], [11, 0, 0, 0, 12]]

        self.x = [[5], [4], [3], [2], [1]]

        self.x_full = FullMatrix(5, 1)

        self.A_full = FullMatrix(5, 5)
        self.A_sparse = SparseMatrix(5, 5)

        for i in range(5):
            self.x_full.addElement(i, 0, self.x[i][0])
            for j in range(5):
                self.A_full.addElement(i, j, self.A[i][j])
                self.A_sparse.addElement(i, j, self.A[i][j])

        self.A_full.augment(self.x_full)
        self.A_sparse.augment(self.x_full)

    def test_rowPermute(self):
        self.A_full.rowPermute(0, 2)
        self.A_full.rowPermute(0, 4)
        self.A_sparse.rowPermute(0, 2)
        self.A_sparse.rowPermute(0, 4)

        self.assertTrue(norm2(self.A_full, self.A_sparse) == 0.0)

    def test_rowScale(self):
        self.A_full.rowScale(0, 3, 3)
        self.A_full.rowScale(4, 1, -4.4)
        self.A_sparse.rowScale(0, 3, 3)
        self.A_sparse.rowScale(4, 1, -4.4)
        self.assertTrue(norm2(self.A_full, self.A_sparse) == 0.0)

    def test_productAx(self):
        x = self.A_full.deaugment()
        self.A_sparse.deaugment()
        full = self.A_full.productAx(x)
        sparse = self.A_sparse.productAx(x)
        self.assertTrue(norm2(full, sparse) == 0.0)

    def test_combined(self):
        self.A_full.rowPermute(0, 2)
        self.A_full.rowPermute(0, 4)
        self.A_sparse.rowPermute(0, 2)
        self.A_sparse.rowPermute(0, 4)
        self.A_full.rowScale(0, 3, 3)
        self.A_full.rowScale(4, 1, -4.4)
        self.A_sparse.rowScale(0, 3, 3)
        self.A_sparse.rowScale(4, 1, -4.4)

        x = self.A_sparse.deaugment()
        self.A_full.deaugment()

        full = self.A_full.productAx(x)
        sparse = self.A_sparse.productAx(x)
        self.assertTrue(norm2(full, sparse) == 0.0)
from matrix import FullMatrix, SparseMatrix
import pickle

file = open("memplus.mtx","r") #Opening memplus.mtx in reading mode
line = file.readline() #Reading the first line which is a comment
line = file.readline() #Reading the second line which contains rowRank, colRank, and number of entries

#Extracting rowRank and colRank
line = line.split()
rowRank = int(line[0])
colRank = int(line[1])
num_elem = int(line[2])

#Initializing a SparseMatrix
A_sparse = SparseMatrix(rowRank,colRank)

#Reading the remainder of memplus.mtx to complete the SparseMatrix

for i in range (1,num_elem+1): #Loop for reading till the end of memplus.mtx
	
	#Print for every 1000 elements added
	if i%1000 == 0:
		print ("Added %d elements." %i)

	'''
	Reading the file and extracting the row, column, and the value of
	the element.
	'''
	line = file.readline()
	line = line.split()
Beispiel #23
0
class Jacobi_Solver:
    """
	An instance is a representation of the linear system to be solved
	using the Jacobi iterative method.
	"""
    def __init__(self, A, b, x0=0, tol=10**-9, max_iter=10**100):
        """
		Initializes the matrix A, column matrix b, initial guess x0, tolerance,
		and maximum number of iterations max_iter.

		D_inv is the inverse of the diagonal matrix D obtained from the diagonal elements of A.
		R is the matrix obtained from (A - D) which is equivalent to (L+U)

		Db is the product obtained from the matrix multiplication of D_inv and b.

		"""
        self.A = copy.deepcopy(A)

        self.b = b

        self.n = A.colRank

        if x0 == 0:
            self.x0 = FullMatrix(self.n, 1)
        else:
            self.x0 = x0

        self.tol = tol
        self.max_iter = max_iter

        self.D_inv = SparseMatrix(self.n, self.n)

        self.R = copy.deepcopy(A)

        for i in range(self.n):
            aii = A.retrieveElement(i, i)
            self.D_inv.addElement(i, i, 1 / aii)
            self.R.deleteElement(i, i)

        self.Db = self.D_inv.productAx(self.b)

        self.x = False

    def one_iter(self):
        """
		One iteration of the Jacobi method.
		"""
        a = self.D_inv.productAx(self.R.productAx(self.x0))
        x = FullMatrix(self.n, 1)
        for i in range(self.n):
            t = -1 * a.retrieveElement(i, 0) + self.Db.retrieveElement(i, 0)
            x.addElement(i, 0, t)

        return x

    def norm2(self, mat1, mat2):
        """
		Calculates the second norm of [mat1 - mat2].

		mat1: A matrix in either full or sparse format
		mat2: A matrix in either full or sparse format

		Returns: Frobenius second norm of the matrix (mat1 - mat2)
		"""
        result = 0
        for i in range(mat1.rowRank):
            for k in range(mat2.colRank):
                a1 = mat1.retrieveElement(i, k)
                a2 = mat2.retrieveElement(i, k)
                s = (a1 - a2)**2
                result += s
        return math.sqrt(result)

    def residual_norm(self):
        """
		Calculates the normalized residual norm using self.x, self.A, and self.b.
		This must be called once the method solve has been called.
		"""
        b_calc = self.A.productAx(self.x)
        numerator = self.norm2(self.b, b_calc)
        denominator = 0
        for i in range(self.b.rowRank):
            denominator += (self.b.retrieveElement(i, 0))**2
        denominator = math.sqrt(denominator)
        return numerator / denominator

    def solve(self):
        """
		Solves the sytem of linear equations using Jacobi iterative method without
		implementing any matrix preconditioning.
		"""
        num_iter = 1
        while num_iter <= self.max_iter:
            x = self.one_iter()
            if self.norm2(x, self.x0) < self.tol:
                self.x = x
                break

            num_iter += 1
            self.x0 = x

        self.x = x
        self.max_iter = num_iter