コード例 #1
0
def computeATreeError(aMatrix, lafMatrix, afMatrix, realTree):
    sampleNum = aMatrix.shape[1]

    aObjMatrix = np.empty(aMatrix.shape, dtype=object)
    #Convert the a matrix to an actual allele matrix
    for row in range(0, aMatrix.shape[0]):
        for col in range(0, aMatrix.shape[1]):
            allele = aMatrix[row][col]
            AOccurrences = [m.start() for m in re.finditer('A', allele)]
            ACount = len(AOccurrences)
            BOccurrences = [m.start() for m in re.finditer('B', allele)]
            BCount = len(BOccurrences)

            alleleObj = Alleles(ACount, BCount)
            aObjMatrix[row][col] = alleleObj

    #Compute the distance pairwise between samples
    distanceMatrix = np.empty([sampleNum, sampleNum], dtype=float)
    [chromosomes, positions, segmentation,
     chromosomeArms] = parseReferenceFile()
    for sample1 in range(0, sampleNum):
        for sample2 in range(0, sampleNum):
            #make a dummy sample object for the FST function
            sample1Obj = Sample(None, None)
            sample1Obj.measurements = LAF(lafMatrix[:, sample1], chromosomes,
                                          positions, positions)
            sample1Obj.measurements.segmentation = segmentation
            sample1Obj.afMeasurements = afMatrix[:, sample1]
            sample2Obj = Sample(None, None)
            sample2Obj.measurements = LAF(lafMatrix[:, sample2], chromosomes,
                                          positions, positions)
            sample2Obj.measurements.segmentation = segmentation
            sample2Obj.afMeasurements = afMatrix[:, sample2]

            #The distance can be computed for the entire column at once using the FST
            [messages,
             dist] = FST().computeAlleleDistance(aObjMatrix[:, sample1],
                                                 aObjMatrix[:, sample2],
                                                 sample1Obj, sample2Obj)
            distanceMatrix[sample1, sample2] = dist
    #print distanceMatrix
    #exit()
    #Compute the MST
    fullGraph = generateInitialTree(distanceMatrix, realTree.vertices)
    mst = computeMST(fullGraph, realTree.vertices)
    simulationErrorHandler = SimulationErrorHandler()
    treeScore = simulationErrorHandler.computeTreeError([mst], realTree)
    return treeScore
コード例 #2
0
def computeATreeError(aMatrix, lafMatrix, afMatrix, realTree, chromosomes,
                      positions):
    segmentationFile = simulationSettings.files['segmentationFile']

    segmentation = Segmentation()
    segmentation.setSegmentationFromFile(segmentationFile)

    sampleNum = aMatrix.shape[1]

    aObjMatrix = np.empty(aMatrix.shape, dtype=object)
    #Convert the a matrix to an actual allele matrix
    for row in range(0, aMatrix.shape[0]):
        for col in range(0, aMatrix.shape[1]):
            allele = aMatrix[row][col]
            AOccurrences = [m.start() for m in re.finditer('A', allele)]
            ACount = len(AOccurrences)
            BOccurrences = [m.start() for m in re.finditer('B', allele)]
            BCount = len(BOccurrences)

            alleleObj = Alleles(ACount, BCount)
            aObjMatrix[row][col] = alleleObj

    #Compute the distance pairwise between samples
    distanceMatrix = np.empty([sampleNum, sampleNum], dtype=float)

    for sample1 in range(0, sampleNum):
        for sample2 in range(0, sampleNum):
            #make a dummy sample object for the FST function
            sample1Obj = Sample(None, None)
            sample1Obj.measurements = LAF(lafMatrix[:, sample1], chromosomes,
                                          positions, positions)
            sample1Obj.measurements.segmentation = segmentation
            sample1Obj.afMeasurements = afMatrix[:, sample1]
            sample2Obj = Sample(None, None)
            sample2Obj.measurements = LAF(lafMatrix[:, sample2], chromosomes,
                                          positions, positions)
            sample2Obj.measurements.segmentation = segmentation
            sample2Obj.afMeasurements = afMatrix[:, sample2]

            #The distance can be computed for the entire column at once using the FST
            [messages,
             dist] = FST().computeAlleleDistance(aObjMatrix[:, sample1],
                                                 aObjMatrix[:, sample2],
                                                 sample1Obj, sample2Obj)
            distanceMatrix[sample1, sample2] = dist
    #print distanceMatrix
    #exit()
    #Compute the MST
    fullGraph = generateInitialTree(distanceMatrix, realTree.vertices)
    inferredTree = computeMST(fullGraph, realTree.vertices)

    [
        ancestrySwapErrorAbsentInInferred, ancestrySwapErrorPresentInInferred,
        noOfSamplePairs
    ] = computeAncestrySwapError(realTree, inferredTree)

    summedError = (ancestrySwapErrorAbsentInInferred +
                   ancestrySwapErrorPresentInInferred)
    averagedAncestrySwapError = summedError / float(noOfSamplePairs)

    #simulationErrorHandler = SimulationErrorHandler()
    #treeScore = simulationErrorHandler.computeTreeError([mst], realTree)
    return averagedAncestrySwapError