def is_symmetric(file): file_matrix1 = io.read_system(file, no_b=False) matrix1 = SparseMatrix(*file_matrix1) file_matrix2 = io.read_system(file, no_b=False) matrix2 = SparseMatrix(*file_matrix2, columns=True) return matrix1 == matrix2
def test_CSR_to_COO(self): Matrix = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)]) M = SparseMatrix(Matrix) result = (M.CSR_to_COO()).intern_represent expected = np.array([ coo_matrix(Matrix).data, coo_matrix(Matrix).row, coo_matrix(Matrix).col ]) npt.assert_array_almost_equal(result[0], expected[0]) npt.assert_array_almost_equal(result[1], expected[1]) npt.assert_array_almost_equal(result[2], expected[2])
def test___add__(self): Matrix1 = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)]) Matrix2 = np.array([(0, 1, 0), (1, 0, 0), (0, 0, 1)]) Matrix3 = np.array([(1, 1, 0), (1, 1, 0), (0, 0, 2)]) csr1 = SparseMatrix(Matrix1) csr2 = SparseMatrix(Matrix2) csr3 = SparseMatrix(Matrix3) result = (csr1 + csr2).intern_represent expected = csr3.intern_represent npt.assert_array_almost_equal(result[0], expected[0]) npt.assert_array_almost_equal(result[1], expected[1]) npt.assert_array_almost_equal(result[2], expected[2])
def test_CSR_to_CSC(self): Matrix = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)]) M = SparseMatrix(Matrix) M = M.CSR_to_CSC() result = M.intern_represent expected = np.array([ csc_matrix(Matrix).data, csc_matrix(Matrix).indptr, csc_matrix(Matrix).indices ]) npt.assert_array_almost_equal(result[0], expected[0]) npt.assert_array_almost_equal(result[1], expected[1]) npt.assert_array_almost_equal(result[2], expected[2])
def test_CSR_x_VECTOR(self): Matrix = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)]) Mcsr1 = SparseMatrix(Matrix) Mcsr2 = csr_matrix(Matrix) V = np.array([1, 2, 3]) result = Mcsr1.CSR_x_VECTOR(V) expected = Mcsr2.dot(V) npt.assert_array_almost_equal(result, expected) Matrix2 = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)]) Mcsr3 = SparseMatrix(Matrix2) V2 = V = np.array([1, 2, 3, 4]) self.assertRaises(Exception, Mcsr3.CSR_x_VECTOR, V2)
def test_CSR_equal_CSC(self): Matrix = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)]) csr = SparseMatrix(Matrix) csc = csr.CSR_to_CSC() self.assertTrue(csr.CSR_equal_CSC(csc)) Matrix2 = np.array([(2, 0, 0), (0, 2, 0), (0, 0, 2)]) csr2 = SparseMatrix(Matrix2) csc2 = csr2.CSR_to_CSC() self.assertFalse(csr.CSR_equal_CSC(csc2))
def main(): numberOfPermutations = 100 # number of permutation in the minHashing phase numberOfBands = 25 # number of bands in LSH phase m = 1000003 createMinHashDatabaseMatrix = False compareBlocksOfDocs = False searchOneDocumentSimilarDocs = False # if a minHash matrix has been already created doTokenShingle = False extest = True if createMinHashDatabaseMatrix: matrix = SparseMatrix() docsinfo = {} if compareBlocksOfDocs: numFiles = parsedata("data2017", "txtdata", matrix, 0, m, docsinfo, doTokenShingle) n = numFiles numFiles += parsedata("data2018", "txtdata", matrix, numFiles, m, docsinfo, doTokenShingle) storeinformation(docsinfo) # always after parsedata else: numFiles = parsedata("Document", "txt", matrix, 0, m, docsinfo, doTokenShingle) storeinformation(docsinfo) # always after parsedata minHashes = minHash(matrix, numFiles, m, None, numberOfPermutations) SaveMinHash(minHashes) results = LSH(minHashes, numberOfBands, numFiles) # print(results) if compareBlocksOfDocs: results = cleanResults(results, n) print(results) if searchOneDocumentSimilarDocs: matrice = SparseMatrix() docsimilar("./Doc4", "./OneDocSimilar/", matrice, m, numberOfPermutations, numberOfBands, "./StoredData/minHash.txt", "./StoredData/ab.txt") if extest: executeTests(numberOfPermutations, numberOfBands, m, "txtdata")
def test_COO_to_CSR(self): Matrix = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)]) csr = SparseMatrix(Matrix) coo = csr.CSR_to_COO() coo2 = np.array([ coo_matrix(Matrix).data, coo_matrix(Matrix).row, coo_matrix(Matrix).col ]) csr_again = coo.COO_to_CSR() result = csr_again.intern_represent expected = csr.intern_represent npt.assert_array_almost_equal(result[0], expected[0]) npt.assert_array_almost_equal(result[1], expected[1]) npt.assert_array_almost_equal(result[2], expected[2]) result2 = coo.intern_represent expected2 = coo2 npt.assert_array_almost_equal(result2[0], expected2[0]) npt.assert_array_almost_equal(result2[1], expected2[1]) npt.assert_array_almost_equal(result2[2], expected2[2])
def __init__(self): self.loopcount = 0 self.code_tree = [] self.program_pointer = 0 self.memory = SparseMatrix() self.memory_pointer = 0 self.output = StringIO() self.grammar = { "1": self.move_forward, "0": self.move_back, ",": self.take_input, "e": self.print_output, "Π": self.increase_value, "i": self.decrease_value, "[": self.start_loop, "]": self.end_loop }
def executeTests(numberOfPermutations, numberOfBands, m, txtdir): extest = True numFiles = 392 similarities = [80, 60, 50, 40, 20] numberOfTests = 50 testResultsList = {80: 0, 60: 0, 50: 0, 40: 0, 20: 0} # documents information retrieval docsinfo = {} with open("./StoredData/docsinfo.txt", "r", encoding='utf-8') as f: for line in f: row = [] for word in line.split(','): row.append(int(word.replace("\n", ""))) docsinfo[row[0]] = [row[1], row[2]] for i in range(0, numberOfTests): editdoc = random.randint(0, numFiles - 1) for similarity in similarities: print("Start " + str(similarity) + "% similarity of Doc" + str(editdoc)) docName = test(editdoc, txtdir, numFiles, similarity, docsinfo) matrice = SparseMatrix() res = docsimilar(docName, "./Test/", matrice, m, numberOfPermutations, numberOfBands, "./StoredData/minHash.txt", "./StoredData/ab.txt", extest) # check if the results from docsimilar is not empty and if in the list there is at least an elemente=editdoc if res: ok = False z = 0 while ok is False and z < len(res): for el in res[z]: if el == editdoc: ok = True z += 1 if ok: testResultsList[similarity] = testResultsList[similarity] + 1 print("End " + str(similarity) + "% similarity of Doc" + str(editdoc)) print("Test number: " + str(i)) print(" ") print("Number of Test: " + str(numberOfTests)) print("Test results with " + str(numberOfBands) + " bands and " + str(numberOfPermutations) + " permutations: \n") print(testResultsList) print(" ")
def test_Change_Element(self): Matrix = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)]) M = SparseMatrix(Matrix) M.Change_Element(2, 0, 0) MIR = M.intern_represent Matrix2 = np.array([(2, 0, 0), (0, 1, 0), (0, 0, 1)]) M2 = SparseMatrix(Matrix2) M2IR = M2.intern_represent npt.assert_array_almost_equal(MIR[0], M2IR[0]) npt.assert_array_almost_equal(MIR[1], M2IR[1]) npt.assert_array_almost_equal(MIR[2], M2IR[2]) Matrix = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)]) M = SparseMatrix(Matrix) M.Change_Element(2, 1, 0) MIR = M.intern_represent Matrix2 = np.array([(1, 0, 0), (2, 1, 0), (0, 0, 1)]) M2 = SparseMatrix(Matrix2) M2IR = M2.intern_represent npt.assert_array_almost_equal(MIR[0], M2IR[0]) npt.assert_array_almost_equal(MIR[1], M2IR[1]) npt.assert_array_almost_equal(MIR[2], M2IR[2])
def build_transition_matrices(self, spark, metapath, relations_dir, constraint_ids): transition_matrices = [] # print("##### Relations #####") for i in range(len(metapath) - 1): relation = metapath[i:i + 2] # print(relation) # read from csv file using a specific schema schema = StructType([ StructField("row", IntegerType(), False), StructField("col", IntegerType(), False) ]) relations = spark.read.csv(relations_dir + relation + '.csv', sep='\t', header=False, schema=schema) if relation[0] in constraint_ids: relations = constraint_ids[relation[0]].join( relations, constraint_ids[relation[0]].id == relations.row).select( relations['*']) if relation[1] in constraint_ids: relations = relations.join( constraint_ids[relation[1]], relations.col == constraint_ids[relation[1]].id).select( relations['*']) transition_matrices.append( SparseMatrix(self._dimensions[i], self._dimensions[i + 1], relations.withColumn("val", lit(1)))) return transition_matrices
def generate_random_symmetric_matrix2(size): m = [[0 for x in range(size)] for y in range(size)] for i in range(size): for j in range(i): prob = random.random() if prob < 0.07: element = random.random() * 1000 m[i][j] = element m[j][i] = element return m if __name__ == "__main__": file_matrix = io.read_system("../test/m_rar_sim_2018_300.txt", no_b=False) matrix1 = SparseMatrix(*file_matrix) # print(is_symmetric("../test/m_rar_sim_2018.txt")) power_method(matrix1) m2 = generate_random_symmetric_matrix2(1000) sizes = [len(list(filter(lambda x: x != 0, line))) for line in m2] print(sizes) print(check_symmetric(np.matrix(m2))) matrix2 = SparseMatrix(1000, [], from_matrix=m2) power_method(matrix2) # print(m2)
return dst/count class ClockTimer(): def __init__(self): self.clock = time.time() def getSpentTime(self): return time.time() - self.clock if __name__ == "__main__": ck = ClockTimer() matrix = SparseMatrix() matrix.addValue(0, 0, 1.0) matrix.addValue(1, 1, 2.0) matrix.addValue(2, 1, 2.0) matrix.addValue(3, 2, 2.0) matrix.addValue(4, 0, 2.0) matrix.addValue(4, 2, 12.0) U = np.ones((5, 2)) W = np.ones((2, 3)) d = Distortion(matrix, U, W) print (d) print "I took", ck.getSpentTime(), "seconds"
sio.savemat(self.outputFile, {'Extraction': SDE}) return self @property def cluster_centers(self): return self.__cluster_centers if __name__ == '__main__': from SparseMatrix import SparseMatrix import argparse import time parser = argparse.ArgumentParser() parser.add_argument("-k", help="k_cluster", type=int) parser.add_argument("-f", help="filename") parser.add_argument("-o", help="output") args = parser.parse_args() fileName = args.f k_cluster = args.k output = args.o if fileName is None: raise ValueError("main: empty filename") elif k_cluster == None or k_cluster <= 0: raise ValueError("main: invalid number k_cluster") else: st = time.time() s = SparseMatrix(fileName=fileName) k = kmeans(k_cluster=k_cluster, saveFile=True, outputFile=output).fit(s) print('Time used:', str(time.time() - st))
import numpy as np np.set_printoptions(threshold=2018) from utils import io from SparseMatrix import SparseMatrix if __name__ == "__main__": file_matrix1 = io.read_system("../test/m_rar_2018_1.txt") matrix = SparseMatrix(*file_matrix1) matrix.verify() solution = matrix.solve_Gauss_Sidel(0.00000001, 50, 10**10) print(solution) print(np.linalg.norm(matrix.multiply_vector(solution) - matrix.b))
def reset(self): self.loopcount = 0 self.code_tree = [] self.program_pointer = 0 self.memory = SparseMatrix() self.memory_pointer = 0
import matplotlib.pyplot as plt #import plotly.plotly as py Mx=np.array([(1,8,2),(0,0,3),(0,5,6)]) Mx2=np.array([(0,1,0),(0,2,3),(0,0,6)]) """ Task 1 """ print() print('Task 1') print() print(Mx) T=SparseMatrix(Mx) print(T) """ Task 2 """ print() print('Task 2') print() print(T.number_of_nonzero) """ Task 3 """
def test_CSR_to_MATRIX(self): Matrix = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)]) M = SparseMatrix(Matrix) M = M.CSR_to_MATRIX() for i in range(M.shape[0]): npt.assert_array_almost_equal(Matrix[i], M[i])
from SparseMatrix import SparseMatrix from utils import io import numpy as np if __name__ == "__main__": # addition file_matrix1 = io.read_system("../test/a.txt") a = SparseMatrix(*file_matrix1) a.verify() file_matrix2 = io.read_system("../test/b.txt") b = SparseMatrix(*file_matrix2) b.verify() file_matrix3 = io.read_system("../test/aplusb.txt") aplusb = SparseMatrix(*file_matrix3) result = a + b print(result - aplusb) print(result == aplusb) # multiplication file_matrix2 = io.read_system("../test/b.txt") b = SparseMatrix(*file_matrix2, True) file_matrix3 = io.read_system("../test/aorib.txt") aorib = SparseMatrix(*file_matrix3) result = a * b
""" First method: Just compute the numerical derivative df = [f(x+h) - f(x)]/h """ N_LATENT_FACTORS = 10 NU = 0.1 N_ITERATIONS = 10 h = 0.1 dataset = DatasetImporter() A = SparseMatrix() A.addValues(dataset.dataset) M, N = A.shape() default_val = np.sqrt(3.)/N_LATENT_FACTORS # why? because it gives exactly an average score of 3 for each cell of A U = default_val * np.ones((M, N_LATENT_FACTORS)) W = default_val * np.ones((N_LATENT_FACTORS, N)) for count in xrange(N_ITERATIONS): # Update of all U(i, j) for i in xrange(M): for j in xrange(N_LATENT_FACTORS): print (i, j)
import os from constants import * from functions import * # RawData类地址从0开始 #region_desc = RawData(region_description_file) region_para = RawData(region_parameter_file) #boundary_desc = RawData(boundary_description_file) boundary_para = RawData(boundary_parameter_file) point_desc = RawData(point_description_file) edge_desc = RawData(edge_description_file) triangle_desc = RawData(triangle_description_file) # 构造初始值为零的稀疏矩阵和向量,地址从1开始 K = SparseMatrix() P = SparseVector() U = SparseVector() B = RawData(B_file) B_lines = RawData(B_lines_file) gapB = RawData(gapB_file) #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# def MakeHomoConditionMatrix(): ''' 根据剖分生成齐次边界条件矩阵 ''' i, j, m = [0] * 3 #XXX