예제 #1
0
def haploRNA3D(templateInput, fastaFile, cycles, path, chain='A'):
    """
    Creates a tertiary structure for each unique sequence in a Fasta file
    using the methods from the module moderna. This function returns three
    files for each sequences: (1) a fasta file comparing the target sequence with
    the sequence of the template pdb structure, (2) a pdb file of the homologous 
    structure with no refinement, and (3) a pdb file of the homologous structure
    with refinement.
    Results are showed in a folder named 'haploRNA3D_month_day_hour_min'.
    This folder is located in Home.
    Module required:
    - argparse
    - subprocess
    - time
    - expanduser (from os.path)
    Usage: <template> <fasta file> <cycles refinement> <path> <chain (default='A')>
    """
    print 'Loading template...'
    template = load_template(templateInput, str(chain))
    clean_structure(template)
    seqTemplate = str(get_sequence(template))

    print 'Loading Fasta file...'
    records = openFasta(fastaFile)
    nameLst, seqLst = getNameSeq(records)    
    for index in range(len(nameLst)):
        seqLst[index] = replaceBase(seqLst[index], seqTemplate)
    if str(path).endswith('/'):
        nameDir = str(path)+'haploRNA3D_'+time.ctime().replace(' ','_').replace(':','_')[4:16]
    else:
        nameDir = str(path)+'/'+'haploRNA3D_'+time.ctime().replace(' ','_').replace(':','_')[4:16]
    subprocess.check_call(['mkdir', nameDir])
    muscle = locate('muscle')

    print 'Refining the models...'
    refinementScript = locate('refine_model_mmtk.py')
    secondStruct = open(nameDir+'/Secondary_Structure.fasta', 'w')
    for elm in range(len(seqLst)):
        print 'Refining', elm+1, 'of', len(seqLst)
        seqTargTempl(seqLst[elm], seqTemplate, nameLst[elm], nameDir)
        model(muscle, nameDir, nameLst[elm], templateInput, chain)
        currentTemplate = load_template(nameDir+'/'+nameLst[elm][:10])
        try:
            subprocess.check_call(['python', refinementScript, '-m',
                           nameDir+'/'+nameLst[elm][:10], '-y',
                           str(cycles), '-o', nameDir+'/'+nameLst[elm][:11]+'_refined.pdb', '-t'])
        except:
            subprocess.check_call(['python', refinementScript, '-m',
                           nameDir+'/'+nameLst[elm][:10], '-y',
                           str(cycles), '-r', '2-'+str(len(currentTemplate)),
                           '-o', nameDir+'/'+nameLst[elm][:11]+'_refined.pdb', '-t'])
        newmodel = load_model(nameDir+'/'+nameLst[elm][:11]+'_refined.pdb')
        newmodelStruc = get_secstruc(newmodel)
        newmodelSeqen = str(get_sequence(newmodel))
        secondStruct.write('>'+nameLst[elm][:11]+'\n'+newmodelSeqen+'\n'+newmodelStruc+'\n')
    secondStruct.close()
    print 'Done!'
예제 #2
0
def haploRNA2D(inFile, title, repeats, siglevel, bootstrap, trc=False):
    """
    Creates a secondary structure for each unique sequence and a
    consensus too. All the process involves the MFE along covariance
    analysis. It also creates Fasta-like files summarizing the
    results.
    Results are showed in a folder named 'haploRNA2D_month_day_hour_min'.
    This folder will be available in Home.
    Module required:
    - time
    - expanduser (from os.path)
    Usage: <Fasta file> <title> <repeats> <significance level> <bootstrap> <transcription (default=False)>
    """
    print 'Reading file...'
    records = GBcode(inFile) # this step change the file to Bio.SeqIO format and also take only the GenBank code as reference
    equLen_file(records)
    if trc == True:
        print 'Transcribing sequences...'
        records = transcribe(records)
    fas2clus(inFile) # make a clustal file to be used by the consensus function
    consensusLst = consensusRNA(inFile+'cl')
    if consensusLst:
        consensusFile = True
        lenSeq = len(consensusLst[0])-1
        consensuStr = consensusLst[1][0:lenSeq]
    else:
        consensusFile = False
        print 'Warning! No consensus structure defined!'
        lenSeq = len(str(records[0].seq))
    nameDir = expanduser('~')+'/'+'haploRNA2D_'+time.ctime().replace(' ','_').replace(':','_')[4:16]

    print 'Making folder...'
    subprocess.check_call(['mkdir', nameDir]) # make a folder inside the home folder as nameDir shows

    VarnaLocate = locate('VARNA') # search and save the path where the application VARNA is located into the variable 'VarnaLocate' 
    if consensusFile:
        print 'Building RNA secondary structure...'
        ConsensusFasta = open(nameDir+'/'+'Consensus_Structure', 'w')
        ConsensusFasta.write('>Consensus secondary structure\n'+consensuStr)
        ConsensusFasta.close()
        secFigFile(VarnaLocate, consensuStr, nameDir, 'Consensus_Secondary_Structure')

    print 'Calculating random covariances...'
    MijLst, CijLst = [], []
    for a in range(repeats):
        seq2Tpl = (randomSeq(len(records), 'RNA'), randomSeq(len(records), 'RNA')) # make a 2-tuple of random sequences
        MijLst.append(Mij(seq2Tpl[0],seq2Tpl[1],2)) # compute and store the Mij value of the two random sequences
        CijLst.append(Cij(seq2Tpl[0],seq2Tpl[1])) # compute and store the Cij value of the two random sequences

    print 'Calculating real covariances...'
    Mijc = searchXijc(kernelXij(MijLst), siglevel, -0.5, 2.0)[0] # here is defined the Mij critical value
    Cijc = searchXijc(kernelXij(CijLst), siglevel, -0.5, 0.75)[0] # here is defined the Cij critical value
    transposeRecords = transpose(records) # here, the alignment matrix is transposed and...
    dicMij, dicCij = dictXij(transposeRecords, 'Mij'), dictXij(transposeRecords, 'Cij') # ... its Mij and Cij values are calculated here

    print 'Saving Hinton diagrams...'
    hintonDiag(dicMij, nameDir+'/Mij Hintong', 'Hinton diagram for Mij values', 2)
    hintonDiag(dicMij, nameDir+'/Cij Hintong', 'Hinton diagram for Cij values', 1.5)

    print 'Saving png KDE images for each array...'
    dicMijVal = dicMij.values()
    dicCijVal = dicCij.values()
    variables = [MijLst, CijLst, dicMijVal, dicCijVal]
    for a in range(len(variables)):
        varName = retrieve_varName(variables[a])[0]
        fig_plot(variables[a], -1, 3, nameDir+'/'+varName, varName)

    print 'Saving svg Logos images for the alignment...'
    info_content_file(records, title, nameDir+'/Logos')

    print 'Searching significative values and best partner...'
    dicMij, dicCij = signXij(dicMij, Mijc), signXij(dicCij, Cijc) # here are saved only those Mij and Cij whose value is greater than the critical value
    dicMijSign, dicCijSign = dicMij.copy(), dicCij.copy()
    dicMij, dicCij = bestPartner(dicMij), bestPartner(dicCij) # only those pair positions with higgest values are saved

    print 'Building the constraint pattern...'
    constraint = []
    for a in dicMij: # 'a' is a 2-tuple which point what positions are basepaired
        if a in dicCij: # this means: if 'a' is in both dicMij and dicCij...
            constraint.append(a) # ... store 'a' in constraint

    print 'Creating files...'
    nameFasta, seqFasta = haplotypes(records)
    makeFasta(nameDir+'/Haplotypes_Fasta', nameFasta, seqFasta)
    conStruct = constrStruc(lenSeq, constraint) # makes the secondary structure pattern for constraint
    haploStruct = []
    for a in seqFasta:
        resultHaploStruct = secondStruct(conStruct, a) 
        haploStruct.append(resultHaploStruct)
    makeFasta(nameDir+'/Haplotype_Structures_Fasta', nameFasta, haploStruct)
    others = open(nameDir+'/Other_Results', 'w')
    others.write('Critical value for Mij:\n'+str(Mijc)+'\n'+
                 'Critical value for Cij:\n'+str(Cijc)+'\n'+
                 'Significative values for Mij:\n'+str(dicMijSign)+'\n'+
                 'Significative values for Cij:\n'+str(dicCijSign)+'\n'+
                 'Mij values after "bestPartner":\n'+str(dicMij)+'\n'+
                 'Cij values after "bestPartner":\n'+str(dicCij)+'\n'+
                 'Final constraint basepairs:\n'+str(constraint)+'\n'+
                 'Consensus constraint:\n'+conStruct+'\n')
    if not consensusFile:
        others.write('No consensus structure defined!')
    others.close()

    print 'Saving phylogenetic tree of secondary structures...'
    treeNJ = dendroNJ(nameDir+'/Haplotype_Structures_Fasta', replicate=bootstrap)
    saveNJMatrix = open(nameDir+'/NJMatrix','w')
    saveNJMatrix.write(treeNJ)
    saveNJMatrix.close()
    saveTree(treeNJ, nameDir+'/Phylo_Secondary_Structure', ladderize=True)

    print 'Making png secondary structures for each haplo-sequence...'
    for a in range(len(nameFasta)):
        print 'Making',a+1,'of', len(nameFasta)
        if '(' in haploStruct[a]:
            secFigFile(VarnaLocate, haploStruct[a], nameDir, nameFasta[a], seqFasta[a])
        else:
            print nameFasta[a], 'has no a RNA secondary structure'

    print 'Done!'