def calcMutualInformation(self, TreeTable): sumMutualInformation = 0 pairCount = 0 for i, snp in enumerate(self.SNPList): if i < len(self.SNPList) - 1: for j in range(i + 1, len(self.SNPList)): hx = snp.entropy hy = self.SNPList[j].entropy hxy = EntropyCalculator.main(TreeTable, snp.aGenomes,snp.tGenomes,snp.cGenomes,snp.gGenomes, self.SNPList[j].aGenomes, self.SNPList[j].tGenomes, self.SNPList[j].cGenomes, self.SNPList[j].gGenomes) sumMutualInformation += hx + hy - hxy pairCount += 1 self.avgMutualInformation = sumMutualInformation / pairCount
def calcMutualInformation(self, TreeTable): sumMutualInformation = 0 pairCount = 0 for i, snp in enumerate(self.SNPList): if i < len(self.SNPList) - 1: for j in range(i + 1, len(self.SNPList)): hx = snp.entropy hy = self.SNPList[j].entropy hxy = EntropyCalculator.main( TreeTable, snp.aGenomes, snp.tGenomes, snp.cGenomes, snp.gGenomes, self.SNPList[j].aGenomes, self.SNPList[j].tGenomes, self.SNPList[j].cGenomes, self.SNPList[j].gGenomes) sumMutualInformation += hx + hy - hxy pairCount += 1 self.avgMutualInformation = sumMutualInformation / pairCount
def main(inputFile, treeTable): fo = open(inputFile, "r") ISGData = [] #read in number of genomes from file fo.seek(12) numGenomes = int(fo.readline().strip()) numGenomes += 1 #add one to include reference genome #read in header row arrLine = fo.readline().strip().split("\t") #place genome names in array arrGenomeName = [] intCounter = 0 for word in arrLine: if (intCounter > 1 and intCounter < (numGenomes + 2)): arrGenomeName.append(word) intCounter += 1 #Determine genome group SNP differentiates for line in fo.readlines(): arrLine = line.strip().split("\t") strChrom = arrLine[0] strPos = arrLine[1] arrSNP = [] #place all SNPs into array for i in range(2, numGenomes + 2): arrSNP.append(arrLine[i]) #sort Genomes into groups by SNP call arrA = [] arrT = [] arrC = [] arrG = [] intCounter = 0 for j in arrSNP: if (arrSNP[intCounter] == 'A'): arrA.append(arrGenomeName[intCounter]) if (arrSNP[intCounter] == 'T'): arrT.append(arrGenomeName[intCounter]) if (arrSNP[intCounter] == 'C'): arrC.append(arrGenomeName[intCounter]) if (arrSNP[intCounter] == 'G'): arrG.append(arrGenomeName[intCounter]) intCounter += 1 #Checking if there at least 2 groups of at least 2 genomes intGroups = 0 if (len(arrA) >= 1): intGroups += 1 if (len(arrT) >= 1): intGroups += 1 if (len(arrC) >= 1): intGroups += 1 if (len(arrG) >= 1): intGroups += 1 # calculate entropy values entropy = EntropyCalculator.main(treeTable, frozenset(arrA), frozenset(arrT), frozenset(arrC), frozenset(arrG)) ISGData.append( SNP(strChrom, int(strPos), entropy, arrA, arrT, arrC, arrG)) fo.close() return ISGData
def main(inputFile, treeTable): fo = open(inputFile, "r") ISGData = [] #read in number of genomes from file fo.seek(12) numGenomes = int(fo.readline().strip()) numGenomes += 1 #add one to include reference genome #read in header row arrLine = fo.readline().strip().split("\t") #place genome names in array arrGenomeName = [] intCounter = 0 for word in arrLine: if(intCounter > 1 and intCounter < (numGenomes + 2)): arrGenomeName.append(word) intCounter += 1 #Determine genome group SNP differentiates for line in fo.readlines(): arrLine = line.strip().split("\t") strChrom = arrLine[0] strPos = arrLine[1] arrSNP = [] #place all SNPs into array for i in range(2,numGenomes+2): arrSNP.append(arrLine[i]) #sort Genomes into groups by SNP call arrA = [] arrT = [] arrC = [] arrG = [] intCounter = 0 for j in arrSNP: if(arrSNP[intCounter] == 'A'): arrA.append(arrGenomeName[intCounter]) if(arrSNP[intCounter] == 'T'): arrT.append(arrGenomeName[intCounter]) if(arrSNP[intCounter] == 'C'): arrC.append(arrGenomeName[intCounter]) if(arrSNP[intCounter] == 'G'): arrG.append(arrGenomeName[intCounter]) intCounter += 1 #Checking if there at least 2 groups of at least 2 genomes intGroups = 0 if(len(arrA) >= 1): intGroups += 1 if(len(arrT) >= 1): intGroups += 1 if(len(arrC) >= 1): intGroups += 1 if(len(arrG) >= 1): intGroups += 1 # calculate entropy values entropy = EntropyCalculator.main(treeTable, frozenset(arrA), frozenset(arrT), frozenset(arrC), frozenset(arrG)) ISGData.append(SNP(strChrom, int(strPos), entropy, arrA, arrT, arrC, arrG)) fo.close() return ISGData