def computeATreeError(aMatrix, lafMatrix, afMatrix, realTree): sampleNum = aMatrix.shape[1] aObjMatrix = np.empty(aMatrix.shape, dtype=object) #Convert the a matrix to an actual allele matrix for row in range(0, aMatrix.shape[0]): for col in range(0, aMatrix.shape[1]): allele = aMatrix[row][col] AOccurrences = [m.start() for m in re.finditer('A', allele)] ACount = len(AOccurrences) BOccurrences = [m.start() for m in re.finditer('B', allele)] BCount = len(BOccurrences) alleleObj = Alleles(ACount, BCount) aObjMatrix[row][col] = alleleObj #Compute the distance pairwise between samples distanceMatrix = np.empty([sampleNum, sampleNum], dtype=float) [chromosomes, positions, segmentation, chromosomeArms] = parseReferenceFile() for sample1 in range(0, sampleNum): for sample2 in range(0, sampleNum): #make a dummy sample object for the FST function sample1Obj = Sample(None, None) sample1Obj.measurements = LAF(lafMatrix[:, sample1], chromosomes, positions, positions) sample1Obj.measurements.segmentation = segmentation sample1Obj.afMeasurements = afMatrix[:, sample1] sample2Obj = Sample(None, None) sample2Obj.measurements = LAF(lafMatrix[:, sample2], chromosomes, positions, positions) sample2Obj.measurements.segmentation = segmentation sample2Obj.afMeasurements = afMatrix[:, sample2] #The distance can be computed for the entire column at once using the FST [messages, dist] = FST().computeAlleleDistance(aObjMatrix[:, sample1], aObjMatrix[:, sample2], sample1Obj, sample2Obj) distanceMatrix[sample1, sample2] = dist #print distanceMatrix #exit() #Compute the MST fullGraph = generateInitialTree(distanceMatrix, realTree.vertices) mst = computeMST(fullGraph, realTree.vertices) simulationErrorHandler = SimulationErrorHandler() treeScore = simulationErrorHandler.computeTreeError([mst], realTree) return treeScore
def computeATreeError(aMatrix, lafMatrix, afMatrix, realTree, chromosomes, positions): segmentationFile = simulationSettings.files['segmentationFile'] segmentation = Segmentation() segmentation.setSegmentationFromFile(segmentationFile) sampleNum = aMatrix.shape[1] aObjMatrix = np.empty(aMatrix.shape, dtype=object) #Convert the a matrix to an actual allele matrix for row in range(0, aMatrix.shape[0]): for col in range(0, aMatrix.shape[1]): allele = aMatrix[row][col] AOccurrences = [m.start() for m in re.finditer('A', allele)] ACount = len(AOccurrences) BOccurrences = [m.start() for m in re.finditer('B', allele)] BCount = len(BOccurrences) alleleObj = Alleles(ACount, BCount) aObjMatrix[row][col] = alleleObj #Compute the distance pairwise between samples distanceMatrix = np.empty([sampleNum, sampleNum], dtype=float) for sample1 in range(0, sampleNum): for sample2 in range(0, sampleNum): #make a dummy sample object for the FST function sample1Obj = Sample(None, None) sample1Obj.measurements = LAF(lafMatrix[:, sample1], chromosomes, positions, positions) sample1Obj.measurements.segmentation = segmentation sample1Obj.afMeasurements = afMatrix[:, sample1] sample2Obj = Sample(None, None) sample2Obj.measurements = LAF(lafMatrix[:, sample2], chromosomes, positions, positions) sample2Obj.measurements.segmentation = segmentation sample2Obj.afMeasurements = afMatrix[:, sample2] #The distance can be computed for the entire column at once using the FST [messages, dist] = FST().computeAlleleDistance(aObjMatrix[:, sample1], aObjMatrix[:, sample2], sample1Obj, sample2Obj) distanceMatrix[sample1, sample2] = dist #print distanceMatrix #exit() #Compute the MST fullGraph = generateInitialTree(distanceMatrix, realTree.vertices) inferredTree = computeMST(fullGraph, realTree.vertices) [ ancestrySwapErrorAbsentInInferred, ancestrySwapErrorPresentInInferred, noOfSamplePairs ] = computeAncestrySwapError(realTree, inferredTree) summedError = (ancestrySwapErrorAbsentInInferred + ancestrySwapErrorPresentInInferred) averagedAncestrySwapError = summedError / float(noOfSamplePairs) #simulationErrorHandler = SimulationErrorHandler() #treeScore = simulationErrorHandler.computeTreeError([mst], realTree) return averagedAncestrySwapError