def generateInitialDistanceMatrix(cls, test=False): ''' Generate the initial nxn distance matrix by computing Distance between each DNA sequence ''' # For Testing Purpose if test == True: cls.simMatrix = np.array( [[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], dtype=float) # Actual Dataset Implementation else: pickleFilePath = Path('data/simMat_3.pkl') if pickleFilePath.exists(): # Load Pickle File storing the simMatrix with open(pickleFilePath, 'rb') as file: cls.simMatrix = pickle.load(file) else: # Compute Distance among DNA Sequence cls.simMatrix = np.ones((cls.ClusterCount, cls.ClusterCount)) for cID in range(cls.ClusterCount): clusterA = cls.getClusterById(cID) for _cID in range(cID, cls.ClusterCount): clusterB = cls.getClusterById(_cID) seq1 = clusterA.sequences[0] seq2 = clusterB.sequences[0] similarity_1 = computeDistance(seq1, seq2) cls.simMatrix[cID, _cID] = similarity_1 cls.simMatrix[_cID, cID] = similarity_1 # print("similarity between {} and {} = {}\r".format(cID, _cID, similarity_1), end='', flush=True) sys.stdout.flush() # print('') # Save The Pickle File with open(pickleFilePath, 'wb') as file: pickle.dump(cls.simMatrix, file)
def genSimilarityMatrix(data): indexes = list(data.keys())[:] pickleFilePath = Path('data/simMatrix_K_Mediods.pkl') if pickleFilePath.exists(): # Load Pickle File storing the simMatrix with open(pickleFilePath, 'rb') as file: simMatrix = pickle.load(file) return simMatrix, indexes ClusterCount = len(indexes) simMatrix = np.ones((ClusterCount, ClusterCount)) for cID in range(ClusterCount): strA = data[indexes[cID]] for _cID in range(cID, ClusterCount): strB = data[indexes[_cID]] similarity_1 = computeDistance(strA, strB) simMatrix[cID, _cID] = similarity_1 simMatrix[_cID, cID] = similarity_1 #print("similarity between {} and {} = {}\r".format(cID, _cID, similarity_1), end='', flush=True) #sys.stdout.flush() #print('') print(simMatrix) with open(pickleFilePath, 'wb') as file: pickle.dump(simMatrix, file) return simMatrix, indexes