예제 #1
0
    def calcMutualInformation(self, TreeTable):
        sumMutualInformation = 0
        pairCount = 0

        for i, snp in enumerate(self.SNPList):
            if i < len(self.SNPList) - 1:
                for j in range(i + 1, len(self.SNPList)):
                    hx = snp.entropy
                    hy = self.SNPList[j].entropy
                    hxy = EntropyCalculator.main(TreeTable, snp.aGenomes,snp.tGenomes,snp.cGenomes,snp.gGenomes, self.SNPList[j].aGenomes, self.SNPList[j].tGenomes,  self.SNPList[j].cGenomes,  self.SNPList[j].gGenomes)
                    sumMutualInformation += hx + hy - hxy
                    pairCount += 1

        self.avgMutualInformation = sumMutualInformation / pairCount
예제 #2
0
    def calcMutualInformation(self, TreeTable):
        sumMutualInformation = 0
        pairCount = 0

        for i, snp in enumerate(self.SNPList):
            if i < len(self.SNPList) - 1:
                for j in range(i + 1, len(self.SNPList)):
                    hx = snp.entropy
                    hy = self.SNPList[j].entropy
                    hxy = EntropyCalculator.main(
                        TreeTable, snp.aGenomes, snp.tGenomes, snp.cGenomes,
                        snp.gGenomes, self.SNPList[j].aGenomes,
                        self.SNPList[j].tGenomes, self.SNPList[j].cGenomes,
                        self.SNPList[j].gGenomes)
                    sumMutualInformation += hx + hy - hxy
                    pairCount += 1

        self.avgMutualInformation = sumMutualInformation / pairCount
예제 #3
0
def main(inputFile, treeTable):

    fo = open(inputFile, "r")

    ISGData = []

    #read in number of genomes from file
    fo.seek(12)
    numGenomes = int(fo.readline().strip())
    numGenomes += 1  #add one to include reference genome

    #read in header row
    arrLine = fo.readline().strip().split("\t")

    #place genome names in array
    arrGenomeName = []
    intCounter = 0
    for word in arrLine:
        if (intCounter > 1 and intCounter < (numGenomes + 2)):
            arrGenomeName.append(word)
        intCounter += 1

    #Determine genome group SNP differentiates
    for line in fo.readlines():
        arrLine = line.strip().split("\t")
        strChrom = arrLine[0]
        strPos = arrLine[1]
        arrSNP = []
        #place all SNPs into array
        for i in range(2, numGenomes + 2):
            arrSNP.append(arrLine[i])
        #sort Genomes into groups by SNP call
        arrA = []
        arrT = []
        arrC = []
        arrG = []
        intCounter = 0
        for j in arrSNP:
            if (arrSNP[intCounter] == 'A'):
                arrA.append(arrGenomeName[intCounter])
            if (arrSNP[intCounter] == 'T'):
                arrT.append(arrGenomeName[intCounter])
            if (arrSNP[intCounter] == 'C'):
                arrC.append(arrGenomeName[intCounter])
            if (arrSNP[intCounter] == 'G'):
                arrG.append(arrGenomeName[intCounter])
            intCounter += 1
        #Checking if there at least 2 groups of at least 2 genomes
        intGroups = 0
        if (len(arrA) >= 1):
            intGroups += 1
        if (len(arrT) >= 1):
            intGroups += 1
        if (len(arrC) >= 1):
            intGroups += 1
        if (len(arrG) >= 1):
            intGroups += 1

        # calculate entropy values
        entropy = EntropyCalculator.main(treeTable, frozenset(arrA),
                                         frozenset(arrT), frozenset(arrC),
                                         frozenset(arrG))

        ISGData.append(
            SNP(strChrom, int(strPos), entropy, arrA, arrT, arrC, arrG))

    fo.close()

    return ISGData
예제 #4
0
def main(inputFile, treeTable):

    fo = open(inputFile, "r")

    ISGData = []

    #read in number of genomes from file
    fo.seek(12)
    numGenomes = int(fo.readline().strip())
    numGenomes += 1            #add one to include reference genome 

    #read in header row
    arrLine = fo.readline().strip().split("\t")

    #place genome names in array
    arrGenomeName = []
    intCounter = 0
    for word in arrLine:
        if(intCounter > 1 and intCounter < (numGenomes + 2)):
            arrGenomeName.append(word)
        intCounter += 1
        
    #Determine genome group SNP differentiates 
    for line in fo.readlines():
        arrLine = line.strip().split("\t")
        strChrom = arrLine[0]
        strPos = arrLine[1]
        arrSNP = []
        #place all SNPs into array
        for i in range(2,numGenomes+2):
            arrSNP.append(arrLine[i])
        #sort Genomes into groups by SNP call
        arrA = []
        arrT = []
        arrC = []
        arrG = []
        intCounter = 0
        for j in arrSNP:
            if(arrSNP[intCounter] == 'A'):
                arrA.append(arrGenomeName[intCounter])
            if(arrSNP[intCounter] == 'T'):
                arrT.append(arrGenomeName[intCounter])
            if(arrSNP[intCounter] == 'C'):
                arrC.append(arrGenomeName[intCounter])
            if(arrSNP[intCounter] == 'G'):
                arrG.append(arrGenomeName[intCounter])
            intCounter += 1
        #Checking if there at least 2 groups of at least 2 genomes
        intGroups = 0
        if(len(arrA) >= 1):
            intGroups += 1
        if(len(arrT) >= 1):
            intGroups += 1
        if(len(arrC) >= 1):
            intGroups += 1
        if(len(arrG) >= 1):
            intGroups += 1

        # calculate entropy values
        entropy = EntropyCalculator.main(treeTable, frozenset(arrA), frozenset(arrT), frozenset(arrC), frozenset(arrG))

        ISGData.append(SNP(strChrom, int(strPos), entropy, arrA, arrT, arrC, arrG))

            
    fo.close()

    return ISGData