def numShifts(rna, secStr): #brute force computation of number shifts #rna and secStr is 1-indexed if VERBOSE_PRINT: print "Sec str: %s" % secStr n = len(rna) rna0 = rna secStr0 = secStr rna = '$' + rna secStr = '$' + secStr SS = basePairList(secStr) num = 0 tempSS = copy.deepcopy(SS) for (i, j) in SS: tempSS.remove((i, j)) if VERBOSE_PRINT: print "Remove (%d,%d) from %s" % (i, j, secStr[1:]) for x in range(1, n + 1): if abs(i - x) > THETA and x != j and basePair(rna[i], rna[x]): if i < x: tempSS.append((i, x)) if isSecStr(tempSS): num += 1 if VERBOSE_PRINT: ss = basePairList2dotBracketNotation(rna0, tempSS) print '(%d,%d)\t%s' % (i, x, ss) tempSS.remove((i, x)) else: #i>x tempSS.append((x, i)) if isSecStr(tempSS): num += 1 if VERBOSE_PRINT: ss = basePairList2dotBracketNotation(rna0, tempSS) print '(%d,%d)\t%s' % (x, i, ss) tempSS.remove((x, i)) elif abs(j - x) > THETA and x != i and basePair(rna[j], rna[x]): if j < x: tempSS.append((j, x)) if isSecStr(tempSS): num += 1 if VERBOSE_PRINT: ss = basePairList2dotBracketNotation(rna0, tempSS) print '(%d,%d)\t%s' % (j, x, ss) tempSS.remove((j, x)) else: #j>x tempSS.append((x, j)) if isSecStr(tempSS): num += 1 if VERBOSE_PRINT: ss = basePairList2dotBracketNotation(rna0, tempSS) print '(%d,%d)\t%s' % (x, j, ss) tempSS.remove((x, j)) tempSS.append((i, j)) #put back the base pair temporarily removed if PRINT: print "%s has %s shifts" % (secStr[1:], num ) #recall '$' prepended to secStr return num
def numShifts(rna,secStr): #brute force computation of number shifts #rna and secStr is 1-indexed if VERBOSE_PRINT: print "Sec str: %s" % secStr n = len(rna) rna0 = rna secStr0= secStr rna = '$'+rna secStr = '$'+secStr SS = basePairList(secStr) num = 0; tempSS = copy.deepcopy(SS) for (i,j) in SS: tempSS.remove( (i,j) ) if VERBOSE_PRINT: print "Remove (%d,%d) from %s" % (i,j,secStr[1:]) for x in range(1,n+1): if abs(i-x)>THETA and x!=j and basePair(rna[i],rna[x]): if i<x: tempSS.append( (i,x) ) if isSecStr(tempSS): num += 1 if VERBOSE_PRINT: ss = basePairList2dotBracketNotation(rna0,tempSS) print '(%d,%d)\t%s' % (i,x,ss) tempSS.remove( (i,x) ) else: #i>x tempSS.append( (x,i) ) if isSecStr(tempSS): num += 1 if VERBOSE_PRINT: ss = basePairList2dotBracketNotation(rna0,tempSS) print '(%d,%d)\t%s' % (x,i,ss) tempSS.remove( (x,i) ) elif abs(j-x)>THETA and x!= i and basePair(rna[j],rna[x]): if j<x: tempSS.append( (j,x) ) if isSecStr(tempSS): num += 1 if VERBOSE_PRINT: ss = basePairList2dotBracketNotation(rna0,tempSS) print '(%d,%d)\t%s' % (j,x,ss) tempSS.remove( (j,x) ) else: #j>x tempSS.append( (x,j) ) if isSecStr(tempSS): num += 1 if VERBOSE_PRINT: ss = basePairList2dotBracketNotation(rna0,tempSS) print '(%d,%d)\t%s' % (x,j,ss) tempSS.remove( (x,j) ) tempSS.append( (i,j) ) #put back the base pair temporarily removed if PRINT: print "%s has %s shifts" % (secStr[1:],num) #recall '$' prepended to secStr return num
def get2DcontactOrder(secStr): co=0.0 bpList = misc.basePairList(secStr) for bp in bpList: co += abs(bp[1]-bp[0]) if len(bpList) !=0: a_co = float(co)/len(bpList) r_co = co/(len(secStr)*len(bpList)) else: a_co = float("inf") r_co = float("inf") return a_co,r_co
def get2DcontactOrder(secStr): co = 0.0 bpList = misc.basePairList(secStr) for bp in bpList: co += abs(bp[1] - bp[0]) if len(bpList) != 0: a_co = float(co) / len(bpList) r_co = co / (len(secStr) * len(bpList)) else: a_co = float("inf") r_co = float("inf") return a_co, r_co
def numBasePairAdditionsRemovals(rna,secStr): #brute force #rna and secStr is 1-indexed if PRINTbasepairadditionsremovals: print "Sec str: %s" % secStr n = len(rna) rna0 = rna rna = '$'+rna secStr = '$'+secStr SS = basePairList(secStr) num = len(SS); tempSS = copy.deepcopy(SS) for x in range(1,n-THETA): for y in range(x+THETA+1,n+1): if basePair(rna[x],rna[y]): if (x,y) not in tempSS: tempSS.append( (x,y) ) if isSecStr(tempSS): num += 1 if PRINTbasepairadditionsremovals: ss = basePairList2dotBracketNotation(rna0,tempSS) print '(%d,%d)\t%s' % (x,y,ss) tempSS.remove( (x,y) ) return num
def numBasePairAdditionsRemovals(rna, secStr): #brute force #rna and secStr is 1-indexed if PRINTbasepairadditionsremovals: print "Sec str: %s" % secStr n = len(rna) rna0 = rna rna = '$' + rna secStr = '$' + secStr SS = basePairList(secStr) num = len(SS) tempSS = copy.deepcopy(SS) for x in range(1, n - THETA): for y in range(x + THETA + 1, n + 1): if basePair(rna[x], rna[y]): if (x, y) not in tempSS: tempSS.append((x, y)) if isSecStr(tempSS): num += 1 if PRINTbasepairadditionsremovals: ss = basePairList2dotBracketNotation(rna0, tempSS) print '(%d,%d)\t%s' % (x, y, ss) tempSS.remove((x, y)) return num
def main(filename): file = open(filename) rna0 = file.readline().strip() secStr0 = computeViennaSecStr(rna0) D = {} SS = {} H = {} #H is entropy n = len(rna0) num = 0 line = file.readline() numMut = 0 numSamples = 0 while line: if line[0] == '>': if numMut == 0: #first time, get number of samples numSamples = int(line.split()[2]) else: #update entropy H H[numMut] = {} for i in range(1, n + 1): #indices 1<=i<=n sumEntropyForI = 0.0 #compute H[i] probII = 1.0 #compute p(i,i) by 1.0 - p(i,j) all j for j in range(1, n + 1): if i != j: probIJ = SS[numMut][i][j] / float(numSamples) sumEntropyForI += -xLogX(probIJ) probII -= probIJ H[numMut][i] = sumEntropyForI + -xLogX(probII) #Now add information for new number of mutations words = line.split() numMut = int(words[-2]) #new number of mutations D[numMut] = {} SS[numMut] = {} for i in range(1, n + 1): SS[numMut][i] = {} for j in range(1, n + 1): SS[numMut][i][j] = 0.0 D[numMut][i - 1] = 0.0 #D[numMut][i] is number of mutations in position i #WARNING: indices in D are 0<=i<n, while those in SS are 1<=i<=n num = 0 #start over counter of number of mutations line = file.readline() continue num += 1 if DEBUG: print numMut, num rna = line.strip().upper() secStr = file.readline().strip() bps = basePairList(secStr) for i in range(n): if rna0[i] != rna[i]: D[numMut][i] += 1.0 baseI = i + 1 #warning 1<=baseI<=n, but 0<=i<n for accessing RNA string for j in range(1, n + 1): if baseI < j and (baseI, j) in bps: SS[numMut][baseI][j] += 1 elif j < baseI and (j, baseI) in bps: SS[numMut][baseI][j] += 1 line = file.readline() file.close() #Must complete computation of H for last value of numMut #As well, we normalize D values to be between [0,1] H[numMut] = {} for i in range(1, n + 1): #indices 1<=i<=n sumEntropyForI = 0.0 #compute H[i] probII = 1.0 #compute p(i,i) by 1.0 - p(i,j) all j for j in range(1, n + 1): if i != j: probIJ = SS[numMut][i][j] / float(num) sumEntropyForI += -xLogX(probIJ) probII -= probIJ H[numMut][i] = sumEntropyForI + -xLogX(probII) #Normalize D values for num in range(1, numMut + 1): for i in range(n): D[num][i] /= numSamples #Now compute correlation coefficient corrCoeffList = [] print "k\tcorrCoeff\tmean1\t\tstdev1\t\tmean2\t\tstdev2" for num in range(1, numMut + 1): L1 = [] L2 = [] for i in range(n): L1.append(D[num][i]) L2.append(H[num][i + 1]) #WARNING: indices in H are from 1 to n mean1, stdev1, max1, min1 = getSampleStats(L1) mean2, stdev2, max2, min2 = getSampleStats(L2) print "%d\t%f\t%f\t%f\t%f\t%f" % (num, corrCoeff( L1, L2), mean1, stdev1, mean2, stdev2)
def main(filename): file = open(filename) rna0 = file.readline().strip() secStr0 = computeViennaSecStr(rna0) D = {}; SS = {}; H = {} #H is entropy n = len(rna0); num = 0 line = file.readline() numMut = 0; numSamples = 0 while line: if line[0]=='>': if numMut == 0: #first time, get number of samples numSamples = int(line.split()[2]) else: #update entropy H H[numMut] = {} for i in range(1,n+1): #indices 1<=i<=n sumEntropyForI = 0.0 #compute H[i] probII = 1.0 #compute p(i,i) by 1.0 - p(i,j) all j for j in range(1,n+1): if i!=j: probIJ = SS[numMut][i][j]/float(numSamples) sumEntropyForI += -xLogX(probIJ) probII -= probIJ H[numMut][i] = sumEntropyForI + -xLogX(probII) #Now add information for new number of mutations words = line.split() numMut = int(words[-2]) #new number of mutations D[numMut] = {} SS[numMut] = {} for i in range(1,n+1): SS[numMut][i] = {} for j in range(1,n+1): SS[numMut][i][j] = 0.0 D[numMut][i-1] = 0.0 #D[numMut][i] is number of mutations in position i #WARNING: indices in D are 0<=i<n, while those in SS are 1<=i<=n num = 0 #start over counter of number of mutations line = file.readline() continue num += 1 if DEBUG: print numMut,num rna = line.strip().upper() secStr = file.readline().strip() bps = basePairList(secStr) for i in range(n): if rna0[i]!=rna[i]: D[numMut][i] += 1.0 baseI = i+1 #warning 1<=baseI<=n, but 0<=i<n for accessing RNA string for j in range(1,n+1): if baseI<j and (baseI,j) in bps: SS[numMut][baseI][j] += 1 elif j<baseI and (j,baseI) in bps: SS[numMut][baseI][j] += 1 line = file.readline() file.close() #Must complete computation of H for last value of numMut #As well, we normalize D values to be between [0,1] H[numMut] = {} for i in range(1,n+1): #indices 1<=i<=n sumEntropyForI = 0.0 #compute H[i] probII = 1.0 #compute p(i,i) by 1.0 - p(i,j) all j for j in range(1,n+1): if i!=j: probIJ = SS[numMut][i][j]/float(num) sumEntropyForI += -xLogX(probIJ) probII -= probIJ H[numMut][i] = sumEntropyForI + -xLogX(probII) #Normalize D values for num in range(1,numMut+1): for i in range(n): D[num][i] /= numSamples #Now compute correlation coefficient corrCoeffList = [] print "k\tcorrCoeff\tmean1\t\tstdev1\t\tmean2\t\tstdev2" for num in range(1,numMut+1): L1 = []; L2 = [] for i in range(n): L1.append(D[num][i]) L2.append(H[num][i+1]) #WARNING: indices in H are from 1 to n mean1,stdev1,max1,min1 = getSampleStats(L1) mean2,stdev2,max2,min2 = getSampleStats(L2) print "%d\t%f\t%f\t%f\t%f\t%f" % (num,corrCoeff(L1,L2),mean1,stdev1,mean2,stdev2)