Esempio n. 1
0
def mult_mut_info(inFile, logbase=2, repeats, siglevel=0.05, bioSense=False):
	if (type(inFile) == str) and (str(type(inFile[0])) != "<class 'Bio.SeqRecord.SeqRecord'>"):
		InFile = rnatk.fasta.openFasta(inFile)
	matrix = rnatk.fasta.transpose(inFile)
	dictResults = {}
	for indexA in range(len(matrix)):
		for indexB in range(len(matrix)):
			for indexC in range(len(matrix)):
				if bioSense:
					if (indexB-indexA>3) and (indexC-indexB>3):
						value = MIxyz(matrix[indexA], matrix[indexB], matrix[indexC], logbase)
						dictResults[indexA+1, indexB+1, indexC+1] = value
				else:
					if indexA<indexB<indexC:
						value = MIxyz(matrix[indexA], matrix[indexB], matrix[indexC], logbase)
						dictResults[indexA+1, indexB+1, indexC+1] = value
	MIxyzRandom = []
	for k in range(repeats):
		seq3Tpl = (randomSeq(len(InFile), 'RNA'), randomSeq(len(InFile), 'RNA'), randomSeq(len(InFile), 'RNA')) # make a 3-tuple of random sequences
		MIxyzRandom.append(MIxyz(seq3Tpl[0], seq3Tpl[1], seq3Tpl[2], logbase)) # compute and store the MIxyz value of the three random sequences
	MIxyzCrit = searchXijc(kernelXij(MIxyzRandom), siglevel, -0.5, 2.0)[0] # here is defined the MIxyz critical value
	return dictResults
Esempio n. 2
0
def haploRNA2D(inFile, title, repeats, siglevel, bootstrap, trc=False):
    """
    Creates a secondary structure for each unique sequence and a
    consensus too. All the process involves the MFE along covariance
    analysis. It also creates Fasta-like files summarizing the
    results.
    Results are showed in a folder named 'haploRNA2D_month_day_hour_min'.
    This folder will be available in Home.
    Module required:
    - time
    - expanduser (from os.path)
    Usage: <Fasta file> <title> <repeats> <significance level> <bootstrap> <transcription (default=False)>
    """
    print 'Reading file...'
    records = GBcode(inFile) # this step change the file to Bio.SeqIO format and also take only the GenBank code as reference
    equLen_file(records)
    if trc == True:
        print 'Transcribing sequences...'
        records = transcribe(records)
    fas2clus(inFile) # make a clustal file to be used by the consensus function
    consensusLst = consensusRNA(inFile+'cl')
    if consensusLst:
        consensusFile = True
        lenSeq = len(consensusLst[0])-1
        consensuStr = consensusLst[1][0:lenSeq]
    else:
        consensusFile = False
        print 'Warning! No consensus structure defined!'
        lenSeq = len(str(records[0].seq))
    nameDir = expanduser('~')+'/'+'haploRNA2D_'+time.ctime().replace(' ','_').replace(':','_')[4:16]

    print 'Making folder...'
    subprocess.check_call(['mkdir', nameDir]) # make a folder inside the home folder as nameDir shows

    VarnaLocate = locate('VARNA') # search and save the path where the application VARNA is located into the variable 'VarnaLocate' 
    if consensusFile:
        print 'Building RNA secondary structure...'
        ConsensusFasta = open(nameDir+'/'+'Consensus_Structure', 'w')
        ConsensusFasta.write('>Consensus secondary structure\n'+consensuStr)
        ConsensusFasta.close()
        secFigFile(VarnaLocate, consensuStr, nameDir, 'Consensus_Secondary_Structure')

    print 'Calculating random covariances...'
    MijLst, CijLst = [], []
    for a in range(repeats):
        seq2Tpl = (randomSeq(len(records), 'RNA'), randomSeq(len(records), 'RNA')) # make a 2-tuple of random sequences
        MijLst.append(Mij(seq2Tpl[0],seq2Tpl[1],2)) # compute and store the Mij value of the two random sequences
        CijLst.append(Cij(seq2Tpl[0],seq2Tpl[1])) # compute and store the Cij value of the two random sequences

    print 'Calculating real covariances...'
    Mijc = searchXijc(kernelXij(MijLst), siglevel, -0.5, 2.0)[0] # here is defined the Mij critical value
    Cijc = searchXijc(kernelXij(CijLst), siglevel, -0.5, 0.75)[0] # here is defined the Cij critical value
    transposeRecords = transpose(records) # here, the alignment matrix is transposed and...
    dicMij, dicCij = dictXij(transposeRecords, 'Mij'), dictXij(transposeRecords, 'Cij') # ... its Mij and Cij values are calculated here

    print 'Saving Hinton diagrams...'
    hintonDiag(dicMij, nameDir+'/Mij Hintong', 'Hinton diagram for Mij values', 2)
    hintonDiag(dicMij, nameDir+'/Cij Hintong', 'Hinton diagram for Cij values', 1.5)

    print 'Saving png KDE images for each array...'
    dicMijVal = dicMij.values()
    dicCijVal = dicCij.values()
    variables = [MijLst, CijLst, dicMijVal, dicCijVal]
    for a in range(len(variables)):
        varName = retrieve_varName(variables[a])[0]
        fig_plot(variables[a], -1, 3, nameDir+'/'+varName, varName)

    print 'Saving svg Logos images for the alignment...'
    info_content_file(records, title, nameDir+'/Logos')

    print 'Searching significative values and best partner...'
    dicMij, dicCij = signXij(dicMij, Mijc), signXij(dicCij, Cijc) # here are saved only those Mij and Cij whose value is greater than the critical value
    dicMijSign, dicCijSign = dicMij.copy(), dicCij.copy()
    dicMij, dicCij = bestPartner(dicMij), bestPartner(dicCij) # only those pair positions with higgest values are saved

    print 'Building the constraint pattern...'
    constraint = []
    for a in dicMij: # 'a' is a 2-tuple which point what positions are basepaired
        if a in dicCij: # this means: if 'a' is in both dicMij and dicCij...
            constraint.append(a) # ... store 'a' in constraint

    print 'Creating files...'
    nameFasta, seqFasta = haplotypes(records)
    makeFasta(nameDir+'/Haplotypes_Fasta', nameFasta, seqFasta)
    conStruct = constrStruc(lenSeq, constraint) # makes the secondary structure pattern for constraint
    haploStruct = []
    for a in seqFasta:
        resultHaploStruct = secondStruct(conStruct, a) 
        haploStruct.append(resultHaploStruct)
    makeFasta(nameDir+'/Haplotype_Structures_Fasta', nameFasta, haploStruct)
    others = open(nameDir+'/Other_Results', 'w')
    others.write('Critical value for Mij:\n'+str(Mijc)+'\n'+
                 'Critical value for Cij:\n'+str(Cijc)+'\n'+
                 'Significative values for Mij:\n'+str(dicMijSign)+'\n'+
                 'Significative values for Cij:\n'+str(dicCijSign)+'\n'+
                 'Mij values after "bestPartner":\n'+str(dicMij)+'\n'+
                 'Cij values after "bestPartner":\n'+str(dicCij)+'\n'+
                 'Final constraint basepairs:\n'+str(constraint)+'\n'+
                 'Consensus constraint:\n'+conStruct+'\n')
    if not consensusFile:
        others.write('No consensus structure defined!')
    others.close()

    print 'Saving phylogenetic tree of secondary structures...'
    treeNJ = dendroNJ(nameDir+'/Haplotype_Structures_Fasta', replicate=bootstrap)
    saveNJMatrix = open(nameDir+'/NJMatrix','w')
    saveNJMatrix.write(treeNJ)
    saveNJMatrix.close()
    saveTree(treeNJ, nameDir+'/Phylo_Secondary_Structure', ladderize=True)

    print 'Making png secondary structures for each haplo-sequence...'
    for a in range(len(nameFasta)):
        print 'Making',a+1,'of', len(nameFasta)
        if '(' in haploStruct[a]:
            secFigFile(VarnaLocate, haploStruct[a], nameDir, nameFasta[a], seqFasta[a])
        else:
            print nameFasta[a], 'has no a RNA secondary structure'

    print 'Done!'