def mult_mut_info(inFile, logbase=2, repeats, siglevel=0.05, bioSense=False): if (type(inFile) == str) and (str(type(inFile[0])) != "<class 'Bio.SeqRecord.SeqRecord'>"): InFile = rnatk.fasta.openFasta(inFile) matrix = rnatk.fasta.transpose(inFile) dictResults = {} for indexA in range(len(matrix)): for indexB in range(len(matrix)): for indexC in range(len(matrix)): if bioSense: if (indexB-indexA>3) and (indexC-indexB>3): value = MIxyz(matrix[indexA], matrix[indexB], matrix[indexC], logbase) dictResults[indexA+1, indexB+1, indexC+1] = value else: if indexA<indexB<indexC: value = MIxyz(matrix[indexA], matrix[indexB], matrix[indexC], logbase) dictResults[indexA+1, indexB+1, indexC+1] = value MIxyzRandom = [] for k in range(repeats): seq3Tpl = (randomSeq(len(InFile), 'RNA'), randomSeq(len(InFile), 'RNA'), randomSeq(len(InFile), 'RNA')) # make a 3-tuple of random sequences MIxyzRandom.append(MIxyz(seq3Tpl[0], seq3Tpl[1], seq3Tpl[2], logbase)) # compute and store the MIxyz value of the three random sequences MIxyzCrit = searchXijc(kernelXij(MIxyzRandom), siglevel, -0.5, 2.0)[0] # here is defined the MIxyz critical value return dictResults
def haploRNA2D(inFile, title, repeats, siglevel, bootstrap, trc=False): """ Creates a secondary structure for each unique sequence and a consensus too. All the process involves the MFE along covariance analysis. It also creates Fasta-like files summarizing the results. Results are showed in a folder named 'haploRNA2D_month_day_hour_min'. This folder will be available in Home. Module required: - time - expanduser (from os.path) Usage: <Fasta file> <title> <repeats> <significance level> <bootstrap> <transcription (default=False)> """ print 'Reading file...' records = GBcode(inFile) # this step change the file to Bio.SeqIO format and also take only the GenBank code as reference equLen_file(records) if trc == True: print 'Transcribing sequences...' records = transcribe(records) fas2clus(inFile) # make a clustal file to be used by the consensus function consensusLst = consensusRNA(inFile+'cl') if consensusLst: consensusFile = True lenSeq = len(consensusLst[0])-1 consensuStr = consensusLst[1][0:lenSeq] else: consensusFile = False print 'Warning! No consensus structure defined!' lenSeq = len(str(records[0].seq)) nameDir = expanduser('~')+'/'+'haploRNA2D_'+time.ctime().replace(' ','_').replace(':','_')[4:16] print 'Making folder...' subprocess.check_call(['mkdir', nameDir]) # make a folder inside the home folder as nameDir shows VarnaLocate = locate('VARNA') # search and save the path where the application VARNA is located into the variable 'VarnaLocate' if consensusFile: print 'Building RNA secondary structure...' ConsensusFasta = open(nameDir+'/'+'Consensus_Structure', 'w') ConsensusFasta.write('>Consensus secondary structure\n'+consensuStr) ConsensusFasta.close() secFigFile(VarnaLocate, consensuStr, nameDir, 'Consensus_Secondary_Structure') print 'Calculating random covariances...' MijLst, CijLst = [], [] for a in range(repeats): seq2Tpl = (randomSeq(len(records), 'RNA'), randomSeq(len(records), 'RNA')) # make a 2-tuple of random sequences MijLst.append(Mij(seq2Tpl[0],seq2Tpl[1],2)) # compute and store the Mij value of the two random sequences CijLst.append(Cij(seq2Tpl[0],seq2Tpl[1])) # compute and store the Cij value of the two random sequences print 'Calculating real covariances...' Mijc = searchXijc(kernelXij(MijLst), siglevel, -0.5, 2.0)[0] # here is defined the Mij critical value Cijc = searchXijc(kernelXij(CijLst), siglevel, -0.5, 0.75)[0] # here is defined the Cij critical value transposeRecords = transpose(records) # here, the alignment matrix is transposed and... dicMij, dicCij = dictXij(transposeRecords, 'Mij'), dictXij(transposeRecords, 'Cij') # ... its Mij and Cij values are calculated here print 'Saving Hinton diagrams...' hintonDiag(dicMij, nameDir+'/Mij Hintong', 'Hinton diagram for Mij values', 2) hintonDiag(dicMij, nameDir+'/Cij Hintong', 'Hinton diagram for Cij values', 1.5) print 'Saving png KDE images for each array...' dicMijVal = dicMij.values() dicCijVal = dicCij.values() variables = [MijLst, CijLst, dicMijVal, dicCijVal] for a in range(len(variables)): varName = retrieve_varName(variables[a])[0] fig_plot(variables[a], -1, 3, nameDir+'/'+varName, varName) print 'Saving svg Logos images for the alignment...' info_content_file(records, title, nameDir+'/Logos') print 'Searching significative values and best partner...' dicMij, dicCij = signXij(dicMij, Mijc), signXij(dicCij, Cijc) # here are saved only those Mij and Cij whose value is greater than the critical value dicMijSign, dicCijSign = dicMij.copy(), dicCij.copy() dicMij, dicCij = bestPartner(dicMij), bestPartner(dicCij) # only those pair positions with higgest values are saved print 'Building the constraint pattern...' constraint = [] for a in dicMij: # 'a' is a 2-tuple which point what positions are basepaired if a in dicCij: # this means: if 'a' is in both dicMij and dicCij... constraint.append(a) # ... store 'a' in constraint print 'Creating files...' nameFasta, seqFasta = haplotypes(records) makeFasta(nameDir+'/Haplotypes_Fasta', nameFasta, seqFasta) conStruct = constrStruc(lenSeq, constraint) # makes the secondary structure pattern for constraint haploStruct = [] for a in seqFasta: resultHaploStruct = secondStruct(conStruct, a) haploStruct.append(resultHaploStruct) makeFasta(nameDir+'/Haplotype_Structures_Fasta', nameFasta, haploStruct) others = open(nameDir+'/Other_Results', 'w') others.write('Critical value for Mij:\n'+str(Mijc)+'\n'+ 'Critical value for Cij:\n'+str(Cijc)+'\n'+ 'Significative values for Mij:\n'+str(dicMijSign)+'\n'+ 'Significative values for Cij:\n'+str(dicCijSign)+'\n'+ 'Mij values after "bestPartner":\n'+str(dicMij)+'\n'+ 'Cij values after "bestPartner":\n'+str(dicCij)+'\n'+ 'Final constraint basepairs:\n'+str(constraint)+'\n'+ 'Consensus constraint:\n'+conStruct+'\n') if not consensusFile: others.write('No consensus structure defined!') others.close() print 'Saving phylogenetic tree of secondary structures...' treeNJ = dendroNJ(nameDir+'/Haplotype_Structures_Fasta', replicate=bootstrap) saveNJMatrix = open(nameDir+'/NJMatrix','w') saveNJMatrix.write(treeNJ) saveNJMatrix.close() saveTree(treeNJ, nameDir+'/Phylo_Secondary_Structure', ladderize=True) print 'Making png secondary structures for each haplo-sequence...' for a in range(len(nameFasta)): print 'Making',a+1,'of', len(nameFasta) if '(' in haploStruct[a]: secFigFile(VarnaLocate, haploStruct[a], nameDir, nameFasta[a], seqFasta[a]) else: print nameFasta[a], 'has no a RNA secondary structure' print 'Done!'