Beispiel #1
0
def numShifts(rna, secStr):  #brute force computation of number shifts
    #rna and secStr is 1-indexed
    if VERBOSE_PRINT:
        print "Sec str: %s" % secStr
    n = len(rna)
    rna0 = rna
    secStr0 = secStr
    rna = '$' + rna
    secStr = '$' + secStr
    SS = basePairList(secStr)
    num = 0
    tempSS = copy.deepcopy(SS)
    for (i, j) in SS:
        tempSS.remove((i, j))
        if VERBOSE_PRINT:
            print "Remove (%d,%d) from %s" % (i, j, secStr[1:])
        for x in range(1, n + 1):
            if abs(i - x) > THETA and x != j and basePair(rna[i], rna[x]):
                if i < x:
                    tempSS.append((i, x))
                    if isSecStr(tempSS):
                        num += 1
                        if VERBOSE_PRINT:
                            ss = basePairList2dotBracketNotation(rna0, tempSS)
                            print '(%d,%d)\t%s' % (i, x, ss)
                    tempSS.remove((i, x))
                else:  #i>x
                    tempSS.append((x, i))
                    if isSecStr(tempSS):
                        num += 1
                        if VERBOSE_PRINT:
                            ss = basePairList2dotBracketNotation(rna0, tempSS)
                            print '(%d,%d)\t%s' % (x, i, ss)
                    tempSS.remove((x, i))
            elif abs(j - x) > THETA and x != i and basePair(rna[j], rna[x]):
                if j < x:
                    tempSS.append((j, x))
                    if isSecStr(tempSS):
                        num += 1
                        if VERBOSE_PRINT:
                            ss = basePairList2dotBracketNotation(rna0, tempSS)
                            print '(%d,%d)\t%s' % (j, x, ss)
                    tempSS.remove((j, x))
                else:  #j>x
                    tempSS.append((x, j))
                    if isSecStr(tempSS):
                        num += 1
                        if VERBOSE_PRINT:
                            ss = basePairList2dotBracketNotation(rna0, tempSS)
                            print '(%d,%d)\t%s' % (x, j, ss)
                    tempSS.remove((x, j))
        tempSS.append((i, j))  #put back the base pair temporarily removed
    if PRINT:
        print "%s has %s shifts" % (secStr[1:], num
                                    )  #recall '$' prepended to secStr
    return num
def numShifts(rna,secStr): #brute force computation of number shifts
  #rna and secStr is 1-indexed
  if VERBOSE_PRINT:
    print "Sec str: %s" % secStr
  n      = len(rna)
  rna0   = rna
  secStr0= secStr
  rna    = '$'+rna
  secStr = '$'+secStr
  SS   = basePairList(secStr)
  num  = 0; tempSS = copy.deepcopy(SS)
  for (i,j) in SS:
    tempSS.remove( (i,j) )
    if VERBOSE_PRINT:
      print "Remove (%d,%d) from %s" % (i,j,secStr[1:])
    for x in range(1,n+1):
      if abs(i-x)>THETA and x!=j and basePair(rna[i],rna[x]):
        if i<x:
          tempSS.append( (i,x) )
          if isSecStr(tempSS): 
            num += 1
            if VERBOSE_PRINT:
              ss = basePairList2dotBracketNotation(rna0,tempSS)
              print '(%d,%d)\t%s' % (i,x,ss)
          tempSS.remove( (i,x) )
        else: #i>x
          tempSS.append( (x,i) )
          if isSecStr(tempSS): 
            num += 1
            if VERBOSE_PRINT:
              ss = basePairList2dotBracketNotation(rna0,tempSS)
              print '(%d,%d)\t%s' % (x,i,ss)
          tempSS.remove( (x,i) )
      elif abs(j-x)>THETA and x!= i and basePair(rna[j],rna[x]):
        if j<x:
          tempSS.append( (j,x) )
          if isSecStr(tempSS): 
            num += 1
            if VERBOSE_PRINT:
              ss = basePairList2dotBracketNotation(rna0,tempSS)
              print '(%d,%d)\t%s' % (j,x,ss)
          tempSS.remove( (j,x) )
        else: #j>x
          tempSS.append( (x,j) )
          if isSecStr(tempSS): 
            num += 1
            if VERBOSE_PRINT:
              ss = basePairList2dotBracketNotation(rna0,tempSS)
              print '(%d,%d)\t%s' % (x,j,ss)
          tempSS.remove( (x,j) )
    tempSS.append( (i,j) ) #put back the base pair temporarily removed
  if PRINT: 
    print "%s has %s shifts" % (secStr[1:],num) #recall '$' prepended to secStr
  return num
Beispiel #3
0
def get2DcontactOrder(secStr):
	co=0.0
	bpList = misc.basePairList(secStr)
	for bp in bpList:
		co += abs(bp[1]-bp[0])
	if len(bpList) !=0:
		a_co = float(co)/len(bpList)
		r_co = co/(len(secStr)*len(bpList))
	else:
		a_co = float("inf")
		r_co = float("inf")
	return a_co,r_co
Beispiel #4
0
def get2DcontactOrder(secStr):
    co = 0.0
    bpList = misc.basePairList(secStr)
    for bp in bpList:
        co += abs(bp[1] - bp[0])
    if len(bpList) != 0:
        a_co = float(co) / len(bpList)
        r_co = co / (len(secStr) * len(bpList))
    else:
        a_co = float("inf")
        r_co = float("inf")
    return a_co, r_co
def numBasePairAdditionsRemovals(rna,secStr): #brute force
  #rna and secStr is 1-indexed
  if PRINTbasepairadditionsremovals: 
    print "Sec str: %s" % secStr
  n      = len(rna)
  rna0   = rna
  rna    = '$'+rna
  secStr = '$'+secStr
  SS     = basePairList(secStr)
  num    = len(SS); tempSS = copy.deepcopy(SS)
  for x in range(1,n-THETA):
    for y in range(x+THETA+1,n+1):
      if basePair(rna[x],rna[y]):
        if (x,y) not in tempSS:
          tempSS.append( (x,y) )
          if isSecStr(tempSS): 
            num += 1
            if PRINTbasepairadditionsremovals: 
              ss = basePairList2dotBracketNotation(rna0,tempSS)
              print '(%d,%d)\t%s' % (x,y,ss)
          tempSS.remove( (x,y) )
  return num
Beispiel #6
0
def numBasePairAdditionsRemovals(rna, secStr):  #brute force
    #rna and secStr is 1-indexed
    if PRINTbasepairadditionsremovals:
        print "Sec str: %s" % secStr
    n = len(rna)
    rna0 = rna
    rna = '$' + rna
    secStr = '$' + secStr
    SS = basePairList(secStr)
    num = len(SS)
    tempSS = copy.deepcopy(SS)
    for x in range(1, n - THETA):
        for y in range(x + THETA + 1, n + 1):
            if basePair(rna[x], rna[y]):
                if (x, y) not in tempSS:
                    tempSS.append((x, y))
                    if isSecStr(tempSS):
                        num += 1
                        if PRINTbasepairadditionsremovals:
                            ss = basePairList2dotBracketNotation(rna0, tempSS)
                            print '(%d,%d)\t%s' % (x, y, ss)
                    tempSS.remove((x, y))
    return num
def main(filename):
    file = open(filename)
    rna0 = file.readline().strip()
    secStr0 = computeViennaSecStr(rna0)
    D = {}
    SS = {}
    H = {}  #H is entropy
    n = len(rna0)
    num = 0
    line = file.readline()
    numMut = 0
    numSamples = 0
    while line:
        if line[0] == '>':
            if numMut == 0:  #first time, get number of samples
                numSamples = int(line.split()[2])
            else:  #update entropy H
                H[numMut] = {}
                for i in range(1, n + 1):  #indices 1<=i<=n
                    sumEntropyForI = 0.0  #compute H[i]
                    probII = 1.0  #compute p(i,i) by 1.0 - p(i,j) all j
                    for j in range(1, n + 1):
                        if i != j:
                            probIJ = SS[numMut][i][j] / float(numSamples)
                            sumEntropyForI += -xLogX(probIJ)
                            probII -= probIJ
                    H[numMut][i] = sumEntropyForI + -xLogX(probII)
            #Now add information for new number of mutations
            words = line.split()
            numMut = int(words[-2])  #new number of mutations
            D[numMut] = {}
            SS[numMut] = {}
            for i in range(1, n + 1):
                SS[numMut][i] = {}
                for j in range(1, n + 1):
                    SS[numMut][i][j] = 0.0
                D[numMut][i - 1] = 0.0
                #D[numMut][i] is number of mutations in position i
                #WARNING: indices in D are 0<=i<n, while those in SS are 1<=i<=n
            num = 0  #start over counter of number of mutations
            line = file.readline()
            continue
        num += 1
        if DEBUG: print numMut, num
        rna = line.strip().upper()
        secStr = file.readline().strip()
        bps = basePairList(secStr)
        for i in range(n):
            if rna0[i] != rna[i]: D[numMut][i] += 1.0
            baseI = i + 1  #warning 1<=baseI<=n, but 0<=i<n for accessing RNA string
            for j in range(1, n + 1):
                if baseI < j and (baseI, j) in bps:
                    SS[numMut][baseI][j] += 1
                elif j < baseI and (j, baseI) in bps:
                    SS[numMut][baseI][j] += 1
        line = file.readline()
    file.close()
    #Must complete computation of H for last value of numMut
    #As well, we normalize D values to be between [0,1]
    H[numMut] = {}
    for i in range(1, n + 1):  #indices 1<=i<=n
        sumEntropyForI = 0.0  #compute H[i]
        probII = 1.0  #compute p(i,i) by 1.0 - p(i,j) all j
        for j in range(1, n + 1):
            if i != j:
                probIJ = SS[numMut][i][j] / float(num)
                sumEntropyForI += -xLogX(probIJ)
                probII -= probIJ
        H[numMut][i] = sumEntropyForI + -xLogX(probII)
    #Normalize D values
    for num in range(1, numMut + 1):
        for i in range(n):
            D[num][i] /= numSamples
    #Now compute correlation coefficient
    corrCoeffList = []
    print "k\tcorrCoeff\tmean1\t\tstdev1\t\tmean2\t\tstdev2"
    for num in range(1, numMut + 1):
        L1 = []
        L2 = []
        for i in range(n):
            L1.append(D[num][i])
            L2.append(H[num][i + 1])  #WARNING: indices in H are from 1 to n
        mean1, stdev1, max1, min1 = getSampleStats(L1)
        mean2, stdev2, max2, min2 = getSampleStats(L2)
        print "%d\t%f\t%f\t%f\t%f\t%f" % (num, corrCoeff(
            L1, L2), mean1, stdev1, mean2, stdev2)
def main(filename):
  file    = open(filename)
  rna0    = file.readline().strip()
  secStr0 = computeViennaSecStr(rna0)
  D    = {}; SS = {}; H = {} #H is entropy
  n = len(rna0); num = 0
  line = file.readline()
  numMut = 0; numSamples = 0
  while line:
    if line[0]=='>':
      if numMut == 0: #first time, get number of samples
        numSamples = int(line.split()[2])
      else: #update entropy H
        H[numMut] = {}
        for i in range(1,n+1): #indices 1<=i<=n
          sumEntropyForI = 0.0  #compute H[i]
          probII         = 1.0  #compute p(i,i) by 1.0 - p(i,j) all j
          for j in range(1,n+1):
            if i!=j: 
              probIJ = SS[numMut][i][j]/float(numSamples)
              sumEntropyForI += -xLogX(probIJ)
              probII         -= probIJ
          H[numMut][i] = sumEntropyForI + -xLogX(probII)
      #Now add information for new number of mutations   
      words      = line.split()
      numMut     = int(words[-2]) #new number of mutations
      D[numMut]  = {}
      SS[numMut] = {}
      for i in range(1,n+1):
        SS[numMut][i] = {}
        for j in range(1,n+1): SS[numMut][i][j] = 0.0
        D[numMut][i-1] = 0.0 
         #D[numMut][i] is number of mutations in position i
         #WARNING: indices in D are 0<=i<n, while those in SS are 1<=i<=n
      num  = 0 #start over counter of number of mutations
      line = file.readline()
      continue
    num    += 1
    if DEBUG: print numMut,num 
    rna    = line.strip().upper()
    secStr = file.readline().strip()
    bps    = basePairList(secStr)
    for i in range(n):
      if rna0[i]!=rna[i]: D[numMut][i] += 1.0
      baseI = i+1 #warning 1<=baseI<=n, but 0<=i<n for accessing RNA string
      for j in range(1,n+1):
        if baseI<j and (baseI,j) in bps:
          SS[numMut][baseI][j] += 1
        elif j<baseI and (j,baseI) in bps:
          SS[numMut][baseI][j] += 1
    line = file.readline()
  file.close()
  #Must complete computation of H for last value of numMut
  #As well, we normalize D values to be between [0,1]
  H[numMut] = {}
  for i in range(1,n+1): #indices 1<=i<=n
    sumEntropyForI = 0.0  #compute H[i]
    probII         = 1.0  #compute p(i,i) by 1.0 - p(i,j) all j
    for j in range(1,n+1):
      if i!=j:
        probIJ = SS[numMut][i][j]/float(num)
        sumEntropyForI += -xLogX(probIJ)
        probII         -= probIJ
    H[numMut][i] = sumEntropyForI + -xLogX(probII)
  #Normalize D values
  for num in range(1,numMut+1):
    for i in range(n): D[num][i] /= numSamples
  #Now compute correlation coefficient
  corrCoeffList = []
  print "k\tcorrCoeff\tmean1\t\tstdev1\t\tmean2\t\tstdev2"
  for num in range(1,numMut+1):
    L1 = []; L2 = []
    for i in range(n):
      L1.append(D[num][i])
      L2.append(H[num][i+1]) #WARNING: indices in H are from 1 to n
    mean1,stdev1,max1,min1 = getSampleStats(L1)
    mean2,stdev2,max2,min2 = getSampleStats(L2)
    print "%d\t%f\t%f\t%f\t%f\t%f" % (num,corrCoeff(L1,L2),mean1,stdev1,mean2,stdev2)