コード例 #1
0
def check_convert(in_filename, in_format, out_format, alphabet=None):
    # Write it out using parse/write
    handle = StringIO()
    aligns = list(AlignIO.parse(in_filename, in_format, None, alphabet))
    try:
        count = AlignIO.write(aligns, handle, out_format)
    except ValueError:
        count = 0
    # Write it out using convert passing filename and handle
    handle2 = StringIO()
    try:
        count2 = AlignIO.convert(in_filename, in_format, handle2, out_format, alphabet)
    except ValueError:
        count2 = 0
    assert count == count2
    assert handle.getvalue() == handle2.getvalue()
    # Write it out using convert passing handle and handle
    handle2 = StringIO()
    try:
        with open(in_filename) as handle1:
            count2 = AlignIO.convert(handle1, in_format, handle2, out_format, alphabet)
    except ValueError:
        count2 = 0
    assert count == count2
    assert handle.getvalue() == handle2.getvalue()
コード例 #2
0
def create_msa(fasta_infile, msa_fasta,msa_phy):
    "Creates a multiple sequence alignment with mafft in phylip format"
    mafft_cline = MafftCommandline(input=fasta_infile) #Create mafft command line
    stdout,stderr = mafft_cline() #save mafft output into variable
    with open(msa_fasta, 'w') as handle:
         handle.write(stdout) #write mafft output in fasta format
    AlignIO.convert(msa_fasta,"fasta", msa_phy, "phylip-relaxed") #convert mafft output from fasta to phylip
コード例 #3
0
def main():
    if len (sys.argv) != 4 :
        print "Please provide file, the file format, and the desired file format "
        sys.exit (1)
    else:
        f = sys.argv[1]
        fout = "".join(f.split('.')[:-1])
        formatin = sys.argv[2]
        formatout  = sys.argv[3]
        if formatout == 'nexus':
            AlignIO.convert(f,formatin,fout+'.'+formatout,formatout,alphabet= IUPAC.ambiguous_dna)
        if formatout == 'mega':
            handle = open(f, "rU")
            record_dict = SeqIO.to_dict(SeqIO.parse(handle, "phylip-relaxed"))
            handle.close()
            
            outfile = open(fout+'.'+formatout,'w')
            outfile.write('#mega'+"\n")
            outfile.write('!Title Mytitle;'+"\n")
            outfile.write('!Format DataType=DNA indel=-;'+"\n\n")
            
            for n in record_dict:
                outfile.write('#'+n+"\n")
                newseq=wrap(str(record_dict[n].seq),60)
                for s in newseq:
                    outfile.write(s+"\n")
            
            outfile.close()
        else:
            AlignIO.convert(f,formatin,fout+'.'+formatout,formatout)
コード例 #4
0
def muscle2phy(): #conversao de ficheiros tipo clustal para phylip de forma a obter posteriormente uma arvore de filogenia
    try:
        lista = interesting_list
        for i in range(len(lista)):
            AlignIO.convert(("Malign" + str(i+1) + ".aln"), "clustal", ("Malign" + str(i+1) + ".phy"), "phylip-relaxed")
        print("All MultiAlignments converted!")
    except:
        print("Converting Error!")
コード例 #5
0
ファイル: fasta2nexus.py プロジェクト: wpwupingwp/python
def convert(arg):
    nexus_files = []
    for i in arg.input:
        nex = i + '.nexus'
        AlignIO.convert(i, 'fasta', nex, 'nexus',
                        alphabet=IUPAC.ambiguous_dna)
        nexus_files.append(nex)
    arg.input = nexus_files
    return arg
コード例 #6
0
def main(argv):
  usage = 'ConvertAln -i <infile> -x <informat> -o <outfile> -f <outformat>'
  infile = ''
  informat = ''
  outfile = ''
  outformat = ''
  try:
     opts, args = getopt.getopt(argv,"hi:x:o:f:",["infile=", "informat=", "outfile=", "outformat="])
  except getopt.GetoptError:
     sys.exit(usage)
  for opt, arg in opts:
     if opt == '-h':
        print usage
        sys.exit()
     elif opt in ("-i", "--infile"):
        infile = arg
     elif opt in ("-x", "--informat"):
        informat = arg
     elif opt in ("-o", "--outfile"):
        outfile = arg
     elif opt in ("-f", "--outformat"):
        outformat = arg
  if not infile:
    sys.exit("must specify infile! %s" % usage)
  if not outformat:
    sys.exit("must specify format to convert to! %s" % usage)
  
  if not informat:
    informat = guess_format(infile)

  if not outfile:
    if '.' in infile:
      outfile = '.'.join((infile.split('.')[:-1] + [get_extension(outformat)]))
    else:
      outfile = '.'.join((infile, get_extension(outformat)))
  if infile == 'pipe' or infile == 'stdin' or infile == 'STDIN' or infile == '|':
    infile = sys.stdin    
  if outformat == 'phylip':
    alignment=AlignIO.read(infile, informat, alphabet=IUPAC.ambiguous_dna)
    alignment = remove_blank(alignment)
    if len(alignment) == 0 or len(alignment[0]) == 0:
      sys.exit()
    if outfile == 'pipe' or outfile == 'stdout' or outfile == 'STDOUT' or outfile == '|' or outfile == '>':
      write_phylip(alignment, sys.stdout)
    else:
      out_fh = open(outfile, 'w')
      write_phylip(alignment, out_fh)
      out_fh.close()

  else:
    if outfile == 'pipe' or outfile == 'stdout' or outfile == 'STDOUT' or outfile == '|' or outfile == '>':
      outfile = sys.stdout
    if outformat == 'nexus':
      alignment=AlignIO.read(infile, informat, alphabet=IUPAC.ambiguous_dna)
      write_nexus(alignment, outfile)
    else:
      AlignIO.convert(infile, informat, outfile, outformat, alphabet=IUPAC.ambiguous_dna)
コード例 #7
0
def get_alignment(pfam_id,my_db):
    outpt_fname = alignment_folder+'/%s'%pfam_id
    if not(os.path.isfile(outpt_fname+".fasta.gz")):          
        print "Saving alignment for", pfam_id
        print ""
        get_pfam_alignment_by_id(pfam_id=pfam_id, outpt_fname=outpt_fname+".sth",db=my_db)
        AlignIO.convert(outpt_fname+".sth","stockholm",outpt_fname+".fasta","fasta")
        if os.path.exists('%s.fasta.gz'%(outpt_fname)):
            subprocess.check_call("rm  %s.fasta.gz"%(outpt_fname),shell=True)
        subprocess.check_call("gzip %s.fasta"%(outpt_fname),shell=True)
コード例 #8
0
ファイル: Dryad.py プロジェクト: happykhan/Dryad-SA
def align(fas, clean):
    if not os.path.exists( 'aln/' + fas +".aln") or clean:
        cmdline = MuscleCommandline(input='fas/' + fas, out='aln/' + fas + ".aln", clw=True)
        print(str(cmdline) + '\n')
        cmdline()
    try:
        AlignIO.convert( 'aln/' + fas +".aln", "clustal", 'phy/' + fas + ".phy", "phylip")
    except Exception as e :
        print 'WARNING: BAD ALIGNMENT'
        print e
コード例 #9
0
ファイル: api.py プロジェクト: ecolell/pfamserver
def realign(msa, algorithm):
    with closing(StringIO()) as f_tmp:
        count = AlignIO.write(msa, f_tmp, "fasta")
        msa = f_tmp.getvalue()
    msa = algorithms[algorithm](msa)
    with closing(StringIO()) as f_out:
        with closing(StringIO(msa)) as f_in:
            count = AlignIO.convert(f_in, "fasta", f_out, "stockholm")
        msa = f_out.getvalue() if count else ""
    return msa
コード例 #10
0
ファイル: npbs.py プロジェクト: DessimozLab/treeCl
def generate_npbs(path, i):
    c = treeCl.Collection(input_dir=path, file_format='phylip')
    working_dir = get_dirs(path, i)['wdir']
    # Check if work already done
    work_done = True
    for rec in c:
        looking_for = '{}.phy'.format(os.path.join(working_dir, rec.name))
        if not (os.path.exists(looking_for) and os.path.getsize(looking_for) > 0):
            if not (os.path.exists(looking_for + '.bz2') and os.path.getsize(looking_for + '.bz2') > 0):
                logger.error("File not found or is empty: {}".format(looking_for))
                work_done = False

    if not work_done:
        npbs = c.permuted_copy()
        if not os.path.exists(working_dir):
            os.mkdir(working_dir)
        for rec in npbs:
            rec.write_alignment('{}.phy'.format(os.path.join(working_dir, rec.name)), 'phylip', True)
            AlignIO.convert('{}.phy'.format(os.path.join(working_dir, rec.name)), 'phylip-relaxed', '{}.phy_'.format(os.path.join(working_dir, rec.name)), 'phylip-relaxed')
            os.system('mv {} {}'.format('{}.phy_'.format(os.path.join(working_dir, rec.name)), '{}.phy'.format(os.path.join(working_dir, rec.name))))
コード例 #11
0
def clustalw(inputseqfile, outputmsafile):

    """Make a multiple sequence alignment with clustalw"""

    clustalw = "/usr/bin/clustalw"
    clustalw_cline = ClustalwCommandline(clustalw, infile=inputseqfile)
    stdout, stderr = clustalw_cline()
    outf = inputseqfile.split(".")[0]
    outff = outf + ".aln"
    align = AlignIO.read(outff, "clustal")
    align = AlignIO.convert(outff, "clustal", outputmsafile, "fasta")
コード例 #12
0
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq

seq1 = "MHQAIFIYQIGYPLKSGYIQSIRSPEYDNW"
seq2 = "MH--IFIYQIGYALKSGYIQSIRSPEY-NW"
seq3 = "MHQAIFI-QIGYALKSGY-QSIRSPEYDNW"

seqr1 = SeqRecord(Seq(seq1, Alphabet.Gapped(IUPAC.protein)), id="seq1")
seqr2 = SeqRecord(Seq(seq2, Alphabet.Gapped(IUPAC.protein)), id="seq2")
seqr3 = SeqRecord(Seq(seq3, Alphabet.Gapped(IUPAC.protein)), id="seq3")

alin = MultipleSeqAlignment([seqr1, seqr2, seqr3])
print(alin)

print(alin[1])  # 2nd sequence
print(alin[:, 2])  # 3rd column
print(alin[:, 3:7])  # 4th to 7th columns (all sequences)
print(alin[0].seq[:3])  # first 3 columns of seq1
print(alin[1:3, 5:12])  # sequences 2 and 3; 4th to 10th column

from Bio import AlignIO
alin2 = AlignIO.read("PF05371_seed.aln", "clustal")

print("Size:", alin2.get_alignment_length())
for record in alin2:
    print(record.seq, record.id)

AlignIO.write(alin2, "example_alin.fasta", "fasta")

AlignIO.convert("PF05371_seed.aln", "clustal", "example_alin.fasta", "fasta")
コード例 #13
0
ファイル: MitoSIS.py プロジェクト: pedronachtigall/MitoSIS
for i in files:
    print("\n" + dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +
          " ::: Aligning, Trimming, and Inferring Phylogeny for " + i +
          " :::\n")
    gene = i.split(".")[0]
    sp.call("mafft " + i + " > " + i + ".aln",
            shell=True,
            stdout=sp.DEVNULL,
            stderr=sp.DEVNULL)
    sp.call("trimal -in " + i + ".aln -out " + i + ".trim -automated1",
            shell=True,
            stdout=sp.DEVNULL,
            stderr=sp.DEVNULL)
    AlignIO.convert(i + ".trim",
                    "fasta",
                    gene + ".nex",
                    'nexus',
                    alphabet=IUPAC.ambiguous_dna)

    ############################################### RUNNING IQTREE FOR EACH GENE & PRINT RESULTS

    sp.call("iqtree -s " + i + ".trim -bb 1000 -seed 12345",
            shell=True,
            stdout=sp.DEVNULL,
            stderr=sp.DEVNULL)
    if os.path.isfile(i + ".trim.contree"):
        t = dendropy.Tree.get_from_path(i + ".trim.contree",
                                        schema='newick',
                                        preserve_underscores=True)
        d = t.phylogenetic_distance_matrix()
        d.write_csv(i + ".phylodist.tsv")
コード例 #14
0
 def test_bootstrap_AlignIO_protein(self):
     """Pseudosample a phylip protein alignment written with AlignIO."""
     n = AlignIO.convert("Clustalw/hedgehog.aln", "clustal",
                         "Phylip/hedgehog.phy", "phylip")
     self.check_bootstrap("Phylip/hedgehog.phy", "phylip", "p")
コード例 #15
0
def write_AlignIO_protein():
    """Convert hedgehog.aln to a phylip file"""
    assert 1 == AlignIO.convert("Clustalw/hedgehog.aln", "clustal",
                                "Phylip/hedgehog.phy", "phylip")
コード例 #16
0
ファイル: aln2phy.py プロジェクト: indexofire/ndafp
# -*- coding: utf-8 -*-
from Bio import AlignIO
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("-input", action=store, dest=input, type=str)
parser.add_argument("-output", action=store, dest=output, type=str)
args = parser.parse_args()

AlignIO.convert(args.input, "clustal", args.output, "phylip-relaxed")
コード例 #17
0
from Bio import AlignIO
from Bio import Alphabet
import sys, os


if __name__ == "__main__":
	if len(sys.argv) != 5:
		print "usage: python seqformat_converter.py inDIR outDIR inputFormat(.fa/.phy/.nex) outputFormat(.fa/.phy/.nex)"
		sys.exit()

	inDIR = sys.argv[1]+"/"
	outDIR = sys.argv[2]+"/"
	inputFormat = sys.argv[3]
	outputFormat = sys.argv[4]

	for i in os.listdir(inDIR):
		if not i.endswith(inputFormat): continue
		clusterID = i.split(inputFormat)[0]
		if inputFormat == '.fa' and outputFormat == '.phy':
			AlignIO.convert(inDIR+i, "fasta", outDIR+clusterID+".phy", "phylip-sequential", alphabet=Alphabet.generic_dna)
		elif inputFormat == '.phy' and outputFormat == '.fa':
			AlignIO.convert(inDIR+i, "phylip-sequential", outDIR+clusterID+".fa", "fasta", alphabet=Alphabet.generic_dna)
		elif inputFormat == '.phy' and outputFormat == '.nex':
			AlignIO.convert(inDIR+i, "phylip-sequential", outDIR+clusterID+".nex", "nexus", alphabet=Alphabet.generic_dna)
		elif inputFormat == '.fa' and outputFormat == '.nex':
                        AlignIO.convert(inDIR+i, "fasta", outDIR+clusterID+".nex", "nexus", alphabet=Alphabet.generic_dna)
コード例 #18
0
def convertFasta2Phylip(input_fasta,
                        output_prefix,
                        format='phylip-sequential'):
    """ convert input_fasta to phylip-relaxed format using Biopython
    """
    AlignIO.convert(input_fasta, 'fasta', output_prefix + ".phy", format)
コード例 #19
0
                                print >>out, d, x, "does not fall within the linear genome range"
                            else:
                                if str(Seq(data[d][x][1]).translate()) != str(data[d][x][2]):
                                    print >>out, d, x, "the translation does not match"                                
            else:
                for e2bs in data[d][x]:
                    if int(e2bs.split("..")[1]) not in range(0, len(data[d]["CG"][1])+1):
                        print >>log, d, x, e2bs, "does not fall within the linear genome range"
        
    
    accessions=[]  
    QC=test_start_stop("./results/results.csv")
    if QC!=None:
        print >>log, QC
    os.system("mafft --add ./results/L1.fas --quiet --reorder ./files/all_L1.mafft.fas > output.fas")
    AlignIO.convert("output.fas", "fasta", "output.phy", "phylip")
    os.system("phyml output.phy 0 i 1 0 GTR 4.0 e 1 1.0 BIONJ n n")
    
    
    new_L1 = []
    for seq_record in SeqIO.parse("./results/L1.fas", "fasta"):
        new_L1.append(seq_record.id)
        for nL1 in new_L1:
            presence("output.fas", nL1, log) #if doing more than a single record at a time, this needs to be edited to get a list of accession numbers and feed these into the function
    print 'all done'




onlyfiles = [ f for f in listdir("./") if isfile(join("./",f)) ]
コード例 #20
0
def alignSeqs(sequencedict):
    clustalfh = open('clustal_alignments.aln', 'w')
    '''
    stockholmfh = open('stockholm_alignments.aln', 'w')
    '''
    UTRfastasfh = open('UTRfastas.fa', 'w')
    clustalfh.close()
    '''
    stockholmfh.close()
    '''
    UTRfastasfh.close()

    if os.path.exists('./StockholmAlignments/') == False:
        os.mkdir('./StockholmAlignments')
    
    for UTR in sequencedict:
        UTRID = str(UTR)
        #Write fasta file from dictionary entry
        fastafh = open('temp.fasta', 'w')
        fastastring = ''
        for species in sequencedict[UTR]:
            fastastring += '>' + str(species.keys()[0]) + '\n' + str(species.values()[0]) + '\n'

        fastafh.write(fastastring)
        fastafh.close()
        tempfastafh = open('temp.fasta', 'r')
        tempfastalines = []
        for line in tempfastafh:
            tempfastalines.append(line)
        tempfastafh.close()

        #Align fasta using clustalw
        cline = ClustalwCommandline('clustalw2', infile = 'temp.fasta')
        cline() #alignment now in temp.aln
        clustallines = []
        tempclustalfh = open('temp.aln', 'r')
        for line in tempclustalfh:
            clustallines.append(line)
        tempclustalfh.close()
        
        #Convert clustal to stockholm
        AlignIO.convert('temp.aln', 'clustal', 'tempstockholm.aln', 'stockholm') 

        #Get secondary structure line from RNAalifold
        ss = subprocess.check_output(['RNAalifold', 'temp.aln']).replace(' ', '\n', 1).split('\n')[-3]
        ssline = '#=GC SS_cons ' + ss + '\n' + '//' + '\n'
        
        #Replace '//' in stockholm file with secondary structure line
        replace_in_file.replace('tempstockholm.aln', '//', ssline)

        #Add ID line to file.  This is necessary for Infernal.
        titleline = '# STOCKHOLM 1.0'
        IDline = '#=GF ID ' + UTRID
        replacement = titleline + '\n' + IDline + '\n'
        replace_in_file.replace('tempstockholm.aln', '# STOCKHOLM 1.0', replacement)

        #Rename stockholm file
        os.rename('tempstockholm.aln', './StockholmAlignments/' + UTRID + '.aln')

        #Now making many small stockholm files instead of one big one.
        '''
        tempstockholmfh = open('tempstockholm.aln', 'r')
        stockholmlines = []
        for line in tempstockholmfh:
            stockholmlines.append(line)
        tempstockholmfh.close()
        '''
        
        #Append current temp aln files to their respective alignment files
        with open('clustal_alignments.aln', 'a') as clustalfile:
            for line in clustallines:
                clustalfile.write(line)

        #Now making many small stockholm files instead of one big one.
        '''
        with open('stockholm_alignments.aln', 'a') as stockholmfile:
            for line in stockholmlines:
                stockholmfile.write(line)
        '''
        
        with open('UTRfastas.fa', 'a') as UTRfastafile:
            for line in tempfastalines:
                UTRfastafile.write(line)
            UTRfastafile.write('\n' + '\n' + '\n')

    #Cleanup
    os.remove('alirna.ps')
    os.remove('temp.aln')
    os.remove('temp.dnd')
    os.remove('temp.fasta')
コード例 #21
0
ファイル: BioPhyloTree.py プロジェクト: sdhutchins/code-haven
@author: Shaurita D. Hutchins
"""
# Part 3: Use multiple sequence alignments in phylip format to create phylogenetic trees.

# Mark start of program with printed text description/title.
print(
    "\n" + (81 * "#") + "\n" +
    "#### Part 3:  Use multiple sequence alignments to create phylogenetic trees. ####"
    + "\n" + (81 * "#") + "\n")

# List of modules imported.
from Bio import Phylo
from Bio.Phylo.TreeConstruction import DistanceCalculator
from Bio import AlignIO

#alignment = AlignIO.read("HTR1E_aligned.phy", "phylip")
#print(alignment)
#print("\n")
#for record in alignment:
#    print(record.seq + " " + record.id + "\n")
#calculator = DistanceCalculator('identity')
#dm = calculator.get_distance(alignment)
#print(dm)

x = AlignIO.convert("HTR1E_aligned.fasta", "fasta", "HTR1E_aligned.phy",
                    "phylip-relaxed")
print(x)
#tree = Phylo.read('outtree.txt', 'newick')
#tree.ladderize()   # Flip branches so deeper clades are displayed at top
#Phylo.draw(tree)
コード例 #22
0
output = args.output
delim = args.delimiter

############################################### CODE

print(dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +
      " ::: Converting alignments to NEXUS and creating partitions :::")
files = sorted(glob.glob(input + '/*'))
sp.call("mkdir -p " + output + "/nexus", shell=True)
x = 1
partitions = open(output + "/partitions.txt", "w")
for file in tqdm(files):
    filename = os.path.basename(file)
    lociname = filename.split(delim)[0]
    outfile = output + '/nexus/' + lociname + '.nex'
    AlignIO.convert(file, type, outfile, 'nexus', alphabet=IUPAC.ambiguous_dna)
    aln = AlignIO.read(file, type)
    partitions.write(lociname + " = " + str(x) + "-" +
                     str(x - 1 + aln.get_alignment_length()) + ";")
    partitions.write("\n")
    x = x + aln.get_alignment_length()

partitions.close()

print(dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +
      " ::: Concatenating NEXUS alignments :::")
loci = sorted([
    os.path.basename(x).split(".nex")[0]
    for x in glob.glob(output + "/nexus/*.nex")
])
nexi = [(locus, Nexus.Nexus(output + "/nexus/" + locus + ".nex"))
コード例 #23
0
#usage
#python fasta.to.stockholm.py <input.file.fasta> <output.file.sto>
#		0			1		2

import sys
from Bio import AlignIO

#input
filein = open(sys.argv[1], "r")

#outputs
fileout = open(sys.argv[2], 'w')

AlignIO.convert(filein, "fasta", fileout, "stockholm")
コード例 #24
0
assert os.path.isfile(clustalw_exe), "Clustal W executable missing"
stdout, stderr = cline()

# %% convert alignment file from fasta from muscle in Poseidon to phyli-relaxed
msaFile = "/muscleAlignmentCoralSeq.msa"
outFile = "/muscleAlignmentCoralSeq.phy"

import os
path = os.getcwd()
#path = "/Users/kgrabb/Documents/2018.05CoralLarvae/Genomes/Poseidon/blastResults/v2"
print(path)
print(path + msaFile)
inputFile = path + msaFile
outputFile = path + outFile
viewFile = pd.read_csv(inputFile)
print(viewFile.head(5))

AlignIO.convert(inputFile, "fasta", outputFile, "phylip-relaxed")

# %% use PhyML. feed in phy alignment with the command line wrapper
from Bio.Phylo.Applications import PhymlCommandline
cmdline = PhymlCommandline(input=outputFile,
                           datatype="aa",
                           model="WAG",
                           alpha="e",
                           bootstrap=100)
out_log, err_log = cmdline()

# %%
コード例 #25
0
def add_pplaced(pfam_id):
    if pfam_id in pplacer_queries.keys():
        print "Running PPlacer for: %s"%pfam_id

        pplace_log=pplacer_folder+'/%s_pplace_log.txt'%pfam_id
        Already_placed=[]
        if os.path.exists(pplace_log):
            with open(pplace_log, "r") as myfile:
                for line in myfile:
                    line=line.strip()
                    if not line:
                        continue
                    line=line.split('\t')
                    if not len(line)==2:
                        continue
                    Already_placed.extend(line[1].split(','))
        Sequnces=[]
        p_ids=[]
        for new_gene in pplacer_queries[pfam_id]:    
            p_id = new_gene['pplacer_id']
            if p_id in Already_placed:
                continue
            p_ids.append(p_id)
            p_seq = gene_seq[new_gene['id']][(new_gene['seq_start']-1):new_gene['seq_end']]
            Sequnces.append(SeqRecord(p_seq, id=p_id))
        if not p_ids:
            print "All %s domains for family %s have already been pplaced."%(len(Already_placed),pfam_id)
            return

		

        rand_id_1=random.randint(1000000,9999999)        
        rand_id_2=random.randint(1000000,9999999)        
        rand_id_3=random.randint(1000000,9999999)        

        subprocess.check_call("gunzip -c %s/%s.log.gz > %s/%s.log.%d"%(tree_folder,pfam_id,tree_folder,pfam_id,rand_id_1),shell=True)
        subprocess.check_call("gunzip -c %s/%s.nw.gz > %s/%s.nw.%d"%(tree_folder,pfam_id,tree_folder,pfam_id,rand_id_2),shell=True)
        subprocess.check_call("gunzip -c %s/%s.fasta.gz > %s/%s.fasta.%d"%(alignment_folder,pfam_id,alignment_folder,pfam_id,rand_id_3),shell=True)

        AlignIO.convert("%s/%s.fasta.%d"%(alignment_folder,pfam_id,rand_id_3),"fasta","%s/%s.sth.%d"%(alignment_folder,pfam_id,rand_id_3),"stockholm")

        hmm_build(hmmbuild_executable_loc=path_to_hmmbuild,
              sequence_file='%s/%s.sth.%d'%(alignment_folder,pfam_id,rand_id_3),
              output_file='%s/%s.hmm'%(pplacer_folder,pfam_id))


        taxit_create(taxit_executable_loc=path_to_taxit,
            aln_fasta='%s/%s.fasta.%d'%(alignment_folder,pfam_id,rand_id_3),
            hmm_file='%s/%s.hmm'%(pplacer_folder,pfam_id),
            tree_file='%s/%s.nw.%d'%(tree_folder,pfam_id,rand_id_2),
            tree_stats='%s/%s.log.%d'%(tree_folder,pfam_id,rand_id_1),
            pfam_acc=pfam_id,
            output_location='%s/%s_pplacer'%(pplacer_folder,pfam_id),
            aln_stockholm='%s/%s_pplacer/%s.sto.%d'%(pplacer_folder,pfam_id,pfam_id,rand_id_3),                        
            )


        if os.path.exists("%s/%s.log.%d"%(tree_folder,pfam_id,rand_id_1)):
            subprocess.check_call("rm  %s/%s.log.%d"%(tree_folder,pfam_id,rand_id_1),shell=True)
        if os.path.exists("%s/%s.nw.%d"%(tree_folder,pfam_id,rand_id_2)):
            subprocess.check_call("rm  %s/%s.nw.%d"%(tree_folder,pfam_id,rand_id_2),shell=True)
        if os.path.exists("%s/%s.fasta.%d"%(alignment_folder,pfam_id,rand_id_3)):
            subprocess.check_call("rm  %s/%s.fasta.%d"%(alignment_folder,pfam_id,rand_id_3),shell=True)
        if os.path.exists("%s/%s.sth.%d"%(alignment_folder,pfam_id,rand_id_3)):
            subprocess.check_call("rm  %s/%s.sth.%d"%(alignment_folder,pfam_id,rand_id_3),shell=True)

        output_prefix = '%s/%s_pplaced'%(pplacer_folder,pfam_id)
        updated_aln = output_prefix + '.sto'
        jplace_output_file = output_prefix + '.jplace'
        tree_output_file = output_prefix + '.tre'
        sequence_file='%s/%s.sth'%(alignment_folder,pfam_id)
        aln_fasta='%s/%s.fasta'%(alignment_folder,pfam_id)
        tree_file='%s/%s.nw'%(tree_folder,pfam_id)

        pplacer_pkg_dir ='%s/%s_pplacer'%(pplacer_folder,pfam_id)
        pplacer_pkg_hmm = '%s/%s.hmm'%(pplacer_pkg_dir,pfam_id)
        pplacer_pkg_aln = '%s/%s.sto.%d'%(pplacer_pkg_dir,pfam_id,rand_id_3)
        tmpf='%s/%s.tmpf'%(pplacer_pkg_dir,pfam_id)
        # Update alignment to include the query sequence for the hypothetical domain.
        aln_res = update_hmmer_alignment(Sequnces,
                                         orig_alignment=pplacer_pkg_aln,
                                         hmm=pplacer_pkg_hmm,tmpf=tmpf)
        aln_out = open(updated_aln,'w')
        AlignIO.write(aln_res[0], aln_out, 'stockholm')
        aln_out.close()

        # Call pplacer to generate placements onto the tree.
        pplaced = pplacer_call(pplacer_package=pplacer_pkg_dir,
                               aln_file=updated_aln,
                               jplace_output_file=jplace_output_file)

        # Use the "guppy" tool to generate the best-placement tree with query as a leaf.
        gt = guppy_tree(jplace_file=jplace_output_file,
                        tree_output_file=tree_output_file)
        #Phylo.convert(tree_output_file, 'newick', tree_output_file_xml, 'phyloxml')


        os.system('rm -rf %s'%(pplacer_pkg_dir))
        os.system('rm %s/%s.hmm'%(pplacer_folder,pfam_id))
        os.system('rm %s/%s_pplaced.jplace'%(pplacer_folder,pfam_id))
        os.system('mv %s %s'%(updated_aln,sequence_file))
        AlignIO.convert(sequence_file,"stockholm",aln_fasta,"fasta")
        if os.path.exists(aln_fasta+'.gz'):
            subprocess.check_call("rm  %s.gz"%(aln_fasta),shell=True)
        subprocess.check_call("gzip %s"%(aln_fasta),shell=True)

        
        
        cmd='mv %s %s'%(tree_output_file,tree_file)
        os.system(cmd)
        if os.path.exists(tree_file+'.gz'):
            subprocess.check_call("rm  %s.gz"%(tree_file),shell=True)
        
        subprocess.check_call("gzip %s"%(tree_file),shell=True)
        with open(pplace_log, "a") as myfile:
            myfile.write("%s\t%s\n"%(my_sequence_file,','.join(p_ids)))
コード例 #26
0
concatenate_seqs(out_trimed_peps, tree_input_fasta)
os.chdir(out_trees)
print("""
@@@@@@@@@@@@@@ 建树软件选择 @@@@@@@@@@@@@@
[fasttree] 使用fasttree 建树,参数为 '-lg -gamma'
[raxml] 使用raxmlHPC-PTHREADS-SSE3建树, 参数为 '-f a -n tre -m PROTGAMMALGX -x 1000 -# 1000 -p 1000 -T 40'
后续版本会提供更多参数选择
[q] 退出程序,自定义建树
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
""")
tree_software = ''
while tree_software not in ['fasttree', 'raxml', 'q']:
    tree_software = input("请输入 fasttree/raxml/q 进行选择---> ")

if tree_software == 'fasttree':
    fasttreeCMD = [
        'fasttree -lg -gamma {} > lg_gamma.tree'.format(tree_input_fasta)
    ]
    sp.call(fasttreeCMD, shell=True)
elif tree_software == 'raxml':
    from Bio import AlignIO
    AlignIO.convert("tree_input.fasta", "fasta", "tree_input.phy",
                    "phylip-relaxed")
    raxmlCMD = [
        'raxmlHPC-PTHREADS-SSE3 -f a -s tree_input.phy -n tre -m PROTGAMMALGX -x 1000 -# 1000 -p 1000 -T 40'
    ]
    sp.call(raxmlCMD, shell=True)
elif tree_software == 'q':
    print('运行结束,祝愉快!')
    sys.exit()
コード例 #27
0
from Bio import AlignIO
count = AlignIO.convert("/home/koreanraichu/agrobacterium.fasta", "fasta", "/home/koreanraichu/agrobacterium.sth",
                        "stockholm")
print("Converted %i alignments" % count)
# Stockholm

alignments = AlignIO.parse("/home/koreanraichu/enterobacter.fasta", "fasta")
count = AlignIO.write(alignments, "/home/koreanraichu/enterobacter.aln","clustal")
print("Converted %i alignments" % count)
# ClustalW

alignment = AlignIO.read("/home/koreanraichu/PF00096_seed.txt", "stockholm")
AlignIO.write([alignment], "/home/koreanraichu/PF00096_seed.aln", "clustal")
print("Converted %i alignments" % count)
# read 후 리스트화해서 변환(clustalW)

count2 = AlignIO.convert("/home/koreanraichu/PF08449_seed.txt", "stockholm", "/home/koreanraichu/PF08449_seed.phy",
                        "phylip")
print("Converted %i alignments" % count)
# 이거라면 필립 될거같은데?

alignment2 = AlignIO.read("/home/koreanraichu/PF08449_seed.txt", "stockholm")
name_mapping = {}
for i, record in enumerate(alignment):
    name_mapping[i] = record.id
    record.id = "seq%i" % i
AlignIO.write([alignment], "/home/koreanraichu/PF08449_seed_ID.phy", "phylip")
# 오 뭔진 모르겠지만 ID가 숫자가 된 건가
コード例 #28
0
def write_AlignIO_protein():
    """Convert hedgehog.aln to a phylip file"""
    assert 1 == AlignIO.convert("Clustalw/hedgehog.aln", "clustal",
                                "Phylip/hedgehog.phy", "phylip")
コード例 #29
0
ファイル: msa_conv.py プロジェクト: PiscatorX/misc-scripts
#!/usr/bin/env  python

from Bio import AlignIO
import csv
import argparse
import pprint
import sys

parser = argparse.ArgumentParser(description="convert msa file")
parser.add_argument('msa_file',
                    help="multiple sequence alignment (MSA) file",
                    type=argparse.FileType('r'))
parser.add_argument('-i', '--informat', default="clustal", help="MSA format")
parser.add_argument('-o',
                    '--outfile',
                    type=argparse.FileType('w'),
                    help="MSA output file",
                    required=True)
parser.add_argument('-v',
                    '--outformat',
                    default="clustal",
                    help="MSA output format")
args = parser.parse_args()

AlignIO.convert(args.msa_file, args.informat, args.outfile, args.outformat)
コード例 #30
0
#usage
#python fasta.to.stockholm.py <input.file.fasta> <output.file.sto>
#		0			1		2

import sys
from Bio import AlignIO

#input
filein=open(sys.argv[1],"r")

#outputs
fileout=open(sys.argv[2],'w')


AlignIO.convert(filein,"stockholm",fileout,"fasta")

コード例 #31
0
def main(fasta, phylip):
    AlignIO.convert("%s" % fasta, "fasta", "%s" % phylip, "phylip-sequential")
コード例 #32
0
ファイル: steps.py プロジェクト: hdetering/discomark
def convertFastaToClustal(in_dir, out_dir):
    for in_path in glob(os.path.join(in_dir, '*.fasta')):
        out_fn = os.path.basename(in_path).replace('.fasta', '.aln')
        out_path = os.path.join(out_dir, out_fn)
        AlignIO.convert(in_path, 'fasta', out_path, 'clustal')
コード例 #33
0
ファイル: mlt.py プロジェクト: iBiology/ProtParCon
def _phyml(exe, msa, model, cat, gamma, alpha, freq, invp, start_tree,
           constraint_tree, seed, outfile):
    """
    Infer ML phylogenetic tree using PhyML.
    """
    
    # cmd = 'exe -i seq -d aa -m JTT -f e|m -v invariable -c 4 -a gamma-alpha
    # --quiet --r_seed num -u user_tree_file'
    wd = tempfile.mkdtemp(dir=os.path.dirname(os.path.abspath(msa)))
    alignment = 'temporary.alignment.phylip'
    AlignIO.convert(msa, 'fasta', os.path.join(wd, alignment), 'phylip')
    
    if model.type == 'builtin':
        m = ['-m', model.name] if model.name else ['-m', 'LG']
    else:
        m = ['-m', 'custom', '--aa_rate_file', model.name]
    
    info('Inferring ML tree for {} using PhyML.'.format(msa))
    args = [exe, '-i', alignment, '-d', 'aa', '--r_seed', str(seed), '--quiet']
    args.extend(m)
    cat = cat if cat not in ('None', 'none', None) else 0
    if cat:
        args.extend(['--free_rates', cat])
    gamma = gamma or model.gamma
    if gamma:
        args.extend(['-c', str(gamma)])
    if alpha:
        args.extend(['-a', str(alpha)])
    frequency = freq or model.frequency
    frequency = 'X' if frequency == 'estimate' else 'F'
    if frequency == 'estimate':
        args.extend(['-f', 'e'])
    else:
        args.extend(['-f', 'm'])
    if start_tree:
        args.extend(['-u', start_tree])
        if constraint_tree:
            args.extend(['--constraint_file', constraint_tree])
    if model.invp:
        if invp:
            args.extend(['-v', str(invp)])
        else:
            args.extend(['-v', 'e'])
    else:
        if invp:
            args.extend(['-v', str(invp)])
    
    try:
        # info('Running FastTree using the following command:\n\t'
        #      '{}'.format(' '.join(args)))
        process = Popen(args, cwd=wd, stdout=PIPE, stderr=PIPE,
                        universal_newlines=True)
        code = process.wait()
        if code:
            msg = process.stderr.read() or process.stdout.read()
            msg = indent(msg, prefix='\t')
            error('Tree inferring failed for {}\n{}'.format(msa, msg))
            sys.exit(1)
        else:
            tree = outfile if outfile else '{}.PhyML.ML.newick'.format(
                    basename(msa))
            try:
                out = '{}{}'.format(os.path.join(wd, alignment),
                                    '_phyml_tree.txt')
                tree = shutil.copy(out, tree)
                info('Successfully save inferred ML tree to {}.'.format(
                        tree))
            except OSError:
                error('Path of outfile {} is not writeable, saving tree to '
                      'file failed.'.format(tree))
                sys.exit(1)
    except OSError:
        error('Tree inferring failed for {}, executable (exe) {} of PhyML '
              'is invalid.'.format(msa, exe))
        sys.exit(1)
    finally:
        shutil.rmtree(wd)
    return tree
コード例 #34
0
# output will be added to the same directory
inFile = sys.argv[1]

# if input specified without a final "/", add one
if not re.search("/$", inFile):
    inFile = inFile + "/"

# check input files present in dir
assert os.path.exists(inFile + "populations.all.phylip") and os.path.exists(
    inFile + "populations.var.phylip"), "Input files not present"

# convert files

# all sites to sequential phylip
print("Converting to all sites phylip to sequential phylip")
AlignIO.convert(inFile + "populations.all.phylip", "phylip",
                inFile + "populations.all.seq.phylip", "phylip-sequential")

# all sites to fasta
print("Converting to all sites phylip to fasta")
AlignIO.convert(inFile + "populations.all.phylip", "phylip",
                inFile + "populations.all.fasta", "fasta")

# all sites to nexus
print("Converting to all sites phylip to nexus")
AlignIO.convert(inFile + "populations.all.phylip", "phylip",
                inFile + "populations.all.nexus", "nexus", "DNA")

# var sites to nexus
print("Converting to variable sites phylip to nexus")
AlignIO.convert(inFile + "populations.var.phylip", "phylip-relaxed",
                inFile + "populations.var.nexus", "nexus", "DNA")
コード例 #35
0
def main():
    args = get_para()

    AlignIO.convert(args.infile, args.input_format, args.outfile,
                    args.output_format)
コード例 #36
0
ファイル: find_a_gene.py プロジェクト: jrsacher/find-a-gene
def build_tree(msa_file, original, file_name="tree", bootstrap=10):
    """
    Build a phylogenetic tree based on a multiple sequence alignment.
        http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc217
        PhyML site: http://www.atgc-montpellier.fr/
        "New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies:
        Assessing the Performance of PhyML 3.0." Guindon S., Dufayard J.F.,
        Lefort V., Anisimova M., Hordijk W., Gascuel O. Systematic Biology,
        59(3):307-21, 2010.
    Color based on novelty
    """
    # See https://biopython.org/wiki/Phylo for general code and settings
    # Convert to phyllip format
    tree_file = file_name   # for convenience
    AlignIO.convert(msa_file, "clustal", tree_file, "phylip-relaxed",
                    alphabet=IUPAC.protein)

    # NOTE: make sure file name is "phyml"
    PhyML = PhymlCommandline("./PhyML")
    PhyML.input = tree_file
    PhyML.datatype = 'aa'  # Specify that amino acids are being input
    PhyML.model = 'LG'  # Amino acid substitution matrix
    PhyML.alpha = 'e'
    # non-parametric bootstrap relplicates; 100 is point of dimiishing returns
    PhyML.bootstrap = bootstrap

    # Run tree generation, print success/failure
    print("Building distance tree from multiple sequence alignment...\n")
    stdout, stderr = PhyML()
    print(stdout + stderr)
    print(f"Newick tree saved as {tree_file + '_phyml_tree.txt'}")

    # Read in tree file, convert to XML (to be able to add color, etc.)
    tree = Phylo.read(tree_file + "_phyml_tree.txt", "newick")
    tree = tree.as_phyloxml()

    # Stylize the tree
    # Colorblind-safe colors can be checked with ColorOracle: http://colororacle.org/
    for clade in tree.find_clades():
        # Bold lines
        clade.width = 3
        # Red if known gene or false positive
        if str(clade.name).startswith("gi|"):
            clade.color = "#e4002b"
        # Blue for originally searched gene
        elif clade.name == original.id:
            clade.color = "#006db6"
        # Black for comparitor nodes
        elif clade.name is not None and not clade.color:
            clade.color = "#000000"
        # Gray for non-terminal nodes
        elif not clade.name:
            clade.color = "#63666a"
        # Green for novel genes
        if str(clade.name).endswith("***"):
            clade.color = "#00bf71"

    # Configure plot. Image size determined based on number of nodes
    tree_len = len(tree.get_terminals())
    plt.rc("font", size=18)  # Bigger font for easier reading
    fig = plt.figure(figsize=(1.6 * tree_len, tree_len), dpi=300)
    axes = fig.add_subplot(1, 1, 1)
    Phylo.draw(tree, axes=axes, do_show=False)
    # Save white background image
    fig.savefig(f"{tree_file}.png", format='png',
                bbox_inches='tight', dpi=300)
    # Save transparent image
    fig.savefig(f"{tree_file}_transparent.png", format='png',
                bbox_inches='tight', dpi=300, transparent=True)
    print(f"Tree images saved as {tree_file + '.png'} "
          f"and {tree_file + '_transparent.png'} to {os.getcwd()}\n")
コード例 #37
0
ファイル: ORGANISM.py プロジェクト: lkrippahl/pycoevol
    def pairwiseDistance(self, id1, id2, method=None):
        """
        Calculates distance between each pair by diferent methods:
        ClustalW distance, p-distance, Jukes-Cantor and Alignment score, 
        with BLOSUM62 or PAM250 matrix.
        (edit Parameters.py)
        """
        
        method = pairwise_distance
        align_matrix = alignscore_matrix
        distances1 = []
        distances2 = []
        
        input = "./Data/" + id1 + ".fasta"
        input_query = SeqIO.parse(input, "fasta", IUPAC.protein)
        for record in input_query:
            q_desc = str(record.description)
            q_seq = str(record.seq)
            break
           
        for entry in self.ord_sequences1:
            p_desc = str(entry[0])
            p_seq = str(entry[1])
            p_seq = p_seq.rstrip(":")
            p_seq = p_seq.split(":")
            new_rec = []
            for seq in p_seq:
                p_new_seq = seq   
                pair = "./Data/" + id1 + ".pair"
                out_pair= open(pair, "w")
            
                sequence1 = str("\n" + ">" + q_desc + "\n" + q_seq + "\n")
                sequence2 = str("\n" + ">" + p_desc + "\n" + p_new_seq + "\n")
                out_pair.write(sequence1 + sequence2)
                out_pair.close()    
            
                output_align = "./Data/" + id1 + ".aln"
                output_tree = "./Data/" + id1 + ".dnd"
                distance = "./Data/" + id1 + ".distance"
                clustalw = system("clustalw " +  pair + " > " + distance) 
                clustalw
                
                output_fasta = "./Data/" + id1 + "_pair.fasta"
                AlignIO.convert(output_align, "clustal", output_fasta, "fasta")
                

                input_align = SeqIO.parse(output_fasta, "fasta", IUPAC.protein)
                msa = []
                for record in input_align:
                    seq = str(record.seq)
                    msa.append(seq)
                sequence1 = msa[0]
                sequence2 = msa[1]
            
                pair_score = getDistance(sequence1, sequence2, 
                                         method, align_matrix, distance)
                value = [pair_score, p_new_seq]
                new_rec.append(value) 
            
            sort = sorted(new_rec, key=lambda new_rec: new_rec[0])
            new_dist = sort[0][0]
            new_seq = sort[0][1]
            distances1.append(new_dist)    
            output = "./Data/" + id1 + ".fasta"
            out_fasta = open(output, "a")
            out_fasta.write("\n" + ">" + p_desc + "\n" + new_seq + "\n")
            out_fasta.close()
                
        try:
            remove(pair)
            remove(output_align)
            remove(output_tree)
            remove(output_fasta)
            remove(distance)
        except:
            pass
        
        input = "./Data/" + id2 + ".fasta"
        input_query = SeqIO.parse(input, "fasta", IUPAC.protein)
        for record in input_query:
            q_desc = str(record.description)
            q_seq = str(record.seq)
            break
        
        for entry in self.ord_sequences2:
            p_desc = str(entry[0])
            p_seq = str(entry[1])
            p_seq = p_seq.rstrip(":")
            p_seq = p_seq.split(":")
            new_rec = []
            for seq in p_seq:
                p_new_seq = seq           
                pair = "./Data/" + id2 + ".pair"
                out_pair= open(pair, "w")
            
                sequence1 = str("\n" + ">" +q_desc + "\n" + q_seq + "\n")
                sequence2 = str("\n" + ">" + p_desc + "\n" + p_new_seq + "\n")
                out_pair.write(sequence1 + sequence2)
                out_pair.close()    
            
                output_align = "./Data/" + id2 + ".aln"
                output_tree = "./Data/" + id2 + ".dnd"
                distance = "./Data/" + id2 + ".distance"
                clustalw = system("clustalw " +  pair + " > " + distance) 
                clustalw 
                
                output_fasta = "./Data/" + id2 + "_pair.fasta"
                AlignIO.convert(output_align, "clustal", output_fasta, "fasta")
            
                input_align = SeqIO.parse(output_fasta, "fasta", IUPAC.protein)
                msa = []
                for record in input_align:
                    seq = str(record.seq)
                    msa.append(seq)
                sequence1 = msa[0]
                sequence2 = msa[1]
            
                pair_score = getDistance(sequence1, sequence2, 
                                         method, align_matrix, distance)
                value = [pair_score, p_new_seq]
                new_rec.append(value) 
            
            sort = sorted(new_rec, key=lambda new_rec: new_rec[0])
            new_dist = sort[0][0]
            new_seq = sort[0][1]
            distances2.append(new_dist)    
            output = "./Data/" + id2 + ".fasta"
            out_fasta = open(output, "a")
            out_fasta.write("\n" + ">" + p_desc + "\n" + new_seq + "\n")
            out_fasta.close()
                
        try:
            remove(pair)
            remove(output_align)
            remove(output_tree)
            remove(output_fasta)
            remove(distance)
        except:
            pass
        
        
        output = "./Data/" + "matrix.txt" 
        out_distance = open(output, "w")
        for i in range(len(distances1)):
            print >> out_distance, "1" + "\t" + str(i+2) + "\t" + \
                                            str(distances1[i]) + "\t" + \
                                            str(distances2[i])
        out_distance.close()
コード例 #38
0
#https://www.biostars.org/p/327003/"""
#from Bio import AlignIO
#from Bio import SeqIO
#alignments = AlignIO.parse("x.fa", "fasta")
#with open("example.faa", "w") as handle:
#   count = SeqIO.write(alignments, handle, "fasta")
from Bio import SeqIO
from Bio import AlignIO
#with open("example.faa", "w") as handle:
#   for record in AlignIO.parse("xA.fa", "fasta"):
#      count = SeqIO.write(record, handle, "phylip")
count = AlignIO.convert("x.fa", "fasta", "example.phy", "phylip-relaxed")
print("Converted %i alignments" % count)
コード例 #39
0
from Bio import AlignIO
import glob


##########################################################################################
##   Converts a directory of sequence alignment files into another file format.
##
##
##   Requirements: 
##   1. Python 2.7
##   2. Biopython
##
##########################################################################################

outputDir = '' ## where output alignments should be located.
inputDir = '' ## where input alignments are located.
outputFormat = 'nexus' ## format of output alignments
inputFormat = 'fasta' ## format on input alignments
outputSuffix = '.nex' ## ending for each output file



alignmentFiles = glob.glob(inputDir + "*")
for f in alignmentFiles:
    align = AlignIO.read(f, inputFormat)
    print "Converting file %s..." % f
    AlignIO.convert(f, inputFormat, outputDir + file_name[:-3] + outputSuffix, outputFormat, alphabet=None)
コード例 #40
0
ファイル: Build_Tree.py プロジェクト: JinfengChen/Rice_pop
def fasta2phy(fa_align, phy_file):
    #phy_file = '%s.phy' %(os.path.splitext(fa_align)[0])
    ofile    = open(phy_file, 'w') 
    AlignIO.convert(fa_align, 'fasta', ofile, 'phylip-relaxed')
    ofile.close()
コード例 #41
0
ファイル: ALIGN.py プロジェクト: liubovch/pycoevol
    def computeAlignment(self, id, alignment):
        "Computes multiple sequence alignment with inputed method"

        if alignment == "clustalw":
            gop = LP(self.parameterfile, "clustalw_gap_opening")
            gep = LP(self.parameterfile, "clustalw_gap_extension")
            d_matrix = LP(self.parameterfile, "clustalw_distance_matrix")

            input_sequences = self.dirname + id + ".fasta"
            output_align = self.dirname + id + ".aln"
            output_fasta = self.dirname + id + "_clustalw.fasta"
            output_tree = self.dirname + id + ".dnd"
            try:
                cmd = str(os.getcwd() + "/src/tools/clustalw/clustalw.exe")
                clustalw = ClustalwCommandline(cmd,
                                               infile=input_sequences,
                                               outfile=output_align,
                                               newtree=output_tree,
                                               align="input",
                                               seqnos="ON",
                                               outorder="input",
                                               type="PROTEIN",
                                               pwmatrix=d_matrix,
                                               gapopen=gop,
                                               gapext=gep)
                clustalw()
            except:
                cmd = str(os.getcwd() + "/src/tools/clustalw/clustalw")
                clustalw = ClustalwCommandline(cmd,
                                               infile=input_sequences,
                                               outfile=output_align,
                                               newtree=output_tree,
                                               align="input",
                                               seqnos="ON",
                                               outorder="input",
                                               type="PROTEIN",
                                               pwmatrix=d_matrix,
                                               gapopen=gop,
                                               gapext=gep)
                clustalw()
            AlignIO.convert(output_align, "clustal", output_fasta, "fasta")
            try:
                remove(output_align)
                remove(output_tree)
            except:
                pass

        elif alignment == "muscle":
            iteration = LP(self.parameterfile, "muscle_max_iteration")

            input_sequences = self.dirname + id + ".fasta"
            output_align = self.dirname + id + "_muscle.aln"
            output_fasta = self.dirname + id + "_muscle.fasta"

            muscle = MuscleCommandline(input=input_sequences,
                                       out=output_align,
                                       clwstrict=True,
                                       maxiters=iteration)
            muscle()
            AlignIO.convert(output_align, "clustal", output_fasta, "fasta")
            try:
                remove(output_align)
            except:
                pass

            organism_order = []
            input_sequences = self.dirname + id + ".fasta"
            align = SeqIO.parse(input_sequences, "fasta", IUPAC.protein)
            for record in align:
                org = record.description
                organism_order.append(org)

            rec = dict()
            output_fasta = self.dirname + id + "_muscle.fasta"
            align = SeqIO.parse(output_fasta, "fasta", IUPAC.protein)
            for record in align:
                org = str(record.description)
                seq = str(record.seq)
                rec[org] = seq

            fasta = open(output_fasta, "w")
            fasta.close()
            fasta = open(output_fasta, "a")
            for org in (organism_order):
                seq = rec[org]
                fasta.write(">" + org + "\n" + seq + "\n")
            fasta.close()

        else:
            configuration = LP(self.parameterfile, "mafft_configuration")
            threads = LP(self.parameterfile, "mafft_threading")
            input_sequences = self.dirname + id + ".fasta"
            output_fasta = self.dirname + id + "_mafft.fasta"

            if configuration == "fftnsi":
                if threads == False:
                    fftnsi = "mafft --retree 2 --maxiterate 1000 --inputorder "
                    mafft = system(fftnsi + input_sequences + ">" +
                                   output_fasta)
                    mafft
                else:
                    try:
                        threads = int(threads)
                        fftnsi = "mafft --retree 2 --maxiterate 1000\
                         --inputorder --threads %i " % (threads)
                        mafft = system(fftnsi + input_sequences + ">" +
                                       output_fasta)
                        mafft
                    except:
                        fftnsi = "mafft --retree 2 --maxiterate 1000 --inputorder "
                        mafft = system(fftnsi + input_sequences + ">" +
                                       output_fasta)
                        mafft
            else:
                if threads == False:
                    linsi = "mafft --localpair --maxiterate 1000 --inputorder "
                    mafft = system(linsi + input_sequences + ">" +
                                   output_fasta)
                    mafft
                else:
                    try:
                        threads = int(threads)
                        linsi = "mafft --localpair --maxiterate 1000\
                         --inputorder --threads %i " % (threads)
                        mafft = system(linsi + input_sequences + ">" +
                                       output_fasta)
                        mafft
                    except:
                        linsi = "mafft --localpair --maxiterate 1000 --inputorder "
                        mafft = system(linsi + input_sequences + ">" +
                                       output_fasta)
                        mafft
コード例 #42
0
try:
    bootstrap = sys.argv[3]
except:
    bootstrap = raw_input("Number of bootstrap: ")

try:
    threads = sys.argv[4]
except:
    threads = raw_input("Number of threads: ")

try:
    name = sys.argv[5]
except:
    name = raw_input("Introduce_number: ")

AlignIO.convert(file, "fasta", file+".phy", "phylip-relaxed")

file_phy = file + ".phy"

try:
    print "raxmlHPC-PTHREADS-AVX -T %s -m GTRCAT -p 12345 -# %s -s %s -n run1" % (threads, trees, file_phy)
    call("raxmlHPC-PTHREADS-AVX -T %s -m GTRCAT -p 12345 -# %s -s %s -n run1" % (threads, trees, file_phy), shell=True)
except:
    print "IT IS NOT GOOD. PLEASE, CHECK YOUR INPUT FILE(S)"
    sys.exit()
		
call("raxmlHPC-PTHREADS-AVX -T %s -m GTRCAT -p 12345 -b 12345 -# %s -s %s -n run2" % (threads, bootstrap, file_phy), shell=True)

try:
	call("raxmlHPC -m GTRCAT -p 12345 -f b -t RAxML_bestTree.run1 -z RAxML_bootstrap.run2 -n %s.run3" % (name), shell=True)
	call("rm *.run1*", shell=True)
コード例 #43
0
ファイル: msa_functions.py プロジェクト: nomihadar/Thesis
def convert_fas_to_phylip(input_file, output_file):
    AlignIO.convert(input_file, FASTA_FORMAT, output_file, PHYLIP_FORMAT)
コード例 #44
0
import sys
from Bio import SeqIO, AlignIO, Phylo
from Bio.Alphabet import generic_protein, generic_dna

options = sys.argv[1:]
incheck = options[0]
infile = options[1]
outfile = options[2]
intype = options[3]
outtype = options[4]

if incheck == 'seq':
    SeqIO.convert(infile, intype, outfile, outtype, generic_dna)
elif incheck == 'align':
    AlignIO.convert(infile, intype, outfile, outtype, generic_dna)
elif incheck == 'tree':
    Phylo.convert(infile, intype, outfile, outtype)
コード例 #45
0
def convert(InfileName, OutfileName):
    '''Uses Biophython to convert the file
    '''
    count = AlignIO.convert(InfileName, "nexus", OutfileName, "phylip")
    print("\nConverted %i alignments" % count)
    print("\nOutput saved as %s" % OutfileName)
コード例 #46
0
def write_AlignIO_dna():
    """Convert opuntia.aln to a phylip file"""
    assert 1 == AlignIO.convert("Clustalw/opuntia.aln", "clustal",
                                "Phylip/opuntia.phy", "phylip")
コード例 #47
0
def coevol(line):
	result={}
	entry = line.split("\t")
	bound_pro = entry[0].split("_")[0]
	bound_pro_c1 = entry[0].split("_")[1].split(":")[0]
	bound_pro_c2 = entry[0].split("_")[1].split(":")[1]

	unbound_pro1 = entry[1].split("_")[0]
	unbound_pro1_c = entry[1].split("_")[1]

	unbound_pro2 = entry[2].split("_")[0]
	unbound_pro2_c = entry[2].split("_")[1][:-1]

	pdb = parsePDB(bound_pro)
	header = parsePDBHeader(bound_pro, 'polymers')
	bp_chid = []
	for polymer in header:
		bp_chid.append(polymer.chid)

	bp1_chid_idx = []
	for c in bound_pro_c1:
		if c in bp_chid:
			bp1_chid_idx.append(bp_chid.index(c))

	bp2_chid_idx = []
	for c in bound_pro_c2:
		if c in bp_chid:
			bp2_chid_idx.append(bp_chid.index(c))

	# header = parsePDBHeader(unbound_pro1, 'polymers')
	# up1_chid = []
	# for polymer in header:
	# 	up1_chid.append(polymer.chid)

	# up1_chid_idx = []
	# for c in unbound_pro1_c:
	# 	if c in up1_chid:
	# 		up1_chid_idx.append(up1_chid.index(c))

	# header = parsePDBHeader(unbound_pro2, 'polymers')
	# up2_chid = []
	# for polymer in header:
	# 	up2_chid.append(polymer.chid)

	# up2_chid_idx = []
	# for c in unbound_pro2_c:
	# 	if c in up2_chid:
	# 		up2_chid_idx.append(up2_chid.index(c))
	seqs = []
	pfam1 = []
	for i in range(len(bp1_chid_idx)):
		if i == 0:
			unip_raw=str(header[bp1_chid_idx[i]].dbrefs).split(" ")[1]
			try:
				unip = searchUniprotID(unip_raw)
				pfamid=searchPfam(unip).keys()[0]
				seq = pdb.getHierView()[bp_chid[bp1_chid_idx[i]]].getSequence()
				good = 1
			except IndexError:
				pdb = parsePDB(bound_pro)
				seq = pdb.getHierView()[bp_chid[bp1_chid_idx[i]]].getSequence()
				pfamid=searchPfam(seq).keys()[0]
				good = 0

			fetchPfamMSA(pfamid)
			pfam1.append(pfamid)
			raw_msa = parseMSA(pfamid + '_full.sth')
			if good == 1:
				refined_msa = refineMSA(raw_msa, label=unip)
			else:
				refined_msa = raw_msa
			total_msa = refined_msa
			total_seq = seq
			seqs.append(seq)
		else:
			unip_raw=str(header[bp1_chid_idx[i]].dbrefs).split(" ")[1]
			try:
				unip = searchUniprotID(unip_raw)
				pfamid=searchPfam(unip).keys()[0]
				seq = pdb.getHierView()[bp_chid[bp1_chid_idx[i]]].getSequence()
				good = 1
			except IndexError:
				pdb = parsePDB(bound_pro)
				seq = pdb.getHierView()[bp_chid[bp1_chid_idx[i]]].getSequence()
				pfamid=searchPfam(seq).keys()[0]
				good = 0
			fetchPfamMSA(pfamid)
			pfam1.append(pfamid)
			raw_msa = parseMSA(pfamid + '_full.sth')
			if good == 1:
				refined_msa = refineMSA(raw_msa, label=unip)
			else:
				refined_msa = raw_msa
			total_msa = mergeMSA(total_msa, refined_msa)
			total_seq = total_seq + seq
			seqs.append(seq)
	total_msa1 = total_msa
	total_seq1 = total_seq

	pfam2 = []
	for i in range(len(bp2_chid_idx)):
		if i == 0:
			unip_raw=str(header[bp2_chid_idx[i]].dbrefs).split(" ")[1]
			try:
				unip = searchUniprotID(unip_raw)
				pfamid=searchPfam(unip).keys()[0]
				seq = pdb.getHierView()[bp_chid[bp2_chid_idx[i]]].getSequence()
				good = 1
			except IndexError:
				pdb = parsePDB(bound_pro)
				seq = pdb.getHierView()[bp_chid[bp2_chid_idx[i]]].getSequence()
				pfamid=searchPfam(seq).keys()[0]
				good = 0

			fetchPfamMSA(pfamid)
			pfam2.append(pfamid)
			raw_msa = parseMSA(pfamid + '_full.sth')
			if good == 1:
				refined_msa = refineMSA(raw_msa, label=unip)
			else:
				refined_msa = raw_msa
			total_msa = refined_msa
			total_seq = seq
			seqs.append(seq)
		else:
			unip_raw=str(header[bp2_chid_idx[i]].dbrefs).split(" ")[1]
			try:
				unip = searchUniprotID(unip_raw)
				pfamid=searchPfam(unip).keys()[0]
				seq = pdb.getHierView()[bp_chid[bp2_chid_idx[i]]].getSequence()
				good = 1
			except IndexError:
				pdb = parsePDB(bound_pro)
				seq = pdb.getHierView()[bp_chid[bp2_chid_idx[i]]].getSequence()
				pfamid=searchPfam(seq).keys()[0]
				good = 0
			fetchPfamMSA(pfamid)
			pfam1.append(pfamid)
			raw_msa = parseMSA(pfamid + '_full.sth')
			if good == 1:
				refined_msa = refineMSA(raw_msa, label=unip)
			else:
				refined_msa = raw_msa
			total_msa = mergeMSA(total_msa, refined_msa)
			total_seq = total_seq + seq
			seqs.append(seq)
	total_msa2 = total_msa
	total_seq2 = total_seq

	mergedMSA = specMergeMSA(total_msa1, total_msa2)
	finalMergedMSA = refineMSA(mergedMSA, colocc=0.8)
	writeMSA(bound_pro + '.fasta', finalMergedMSA)

	merged_seq = total_seq1 + total_seq2
	g = open(bound_pro + '_one.fasta','w')
	g.write(">" + bound_pro + "\n")
	g.write(merged_seq)
	g.close()
	
	call(["clustalw -profile1=" + bound_pro + '.fasta -sequences -profile2=' + bound_pro + '_one.fasta'], shell=True)
	AlignIO.convert(bound_pro + "_one.aln","clustal",bound_pro + "_final.fasta","fasta")
	finalMSA = parseMSA(bound_pro + "_final.fasta")
	finalRefinedMSA = refineMSA(finalMSA, colocc=0.95)

	idx_real = finalRefinedMSA.getIndex(bound_pro)
	seq_from_alignment = str(finalRefinedMSA[idx_real])

	res_idx_str = []
	res_idx_ali = []
	for seq in seqs:
		alignment = pairwise2.align.globalms(seq, seq_from_alignment,5,-20,-5,-1)
		p1res = []
		p2res = []
		count = 0
		count1 = 0
		for i in range(0,len(alignment[0][0])):
				if alignment[0][0][i] == '-':
					count = count+1
				if alignment[0][1][i] == '-':
					count1 = count1 + 1
				if alignment[0][0][i] != '-' and alignment[0][1][i] != '-':
					p1res.append(i-count)
					p2res.append(i-count1)
		res_idx_str.append(p1res)
		res_idx_ali.append(p2res)

	res_idx_after_ali = []
	for i in range(len(res_idx_ali)):
		res_idx_after_ali.append([])
		groups=group_consecutives(np.array(res_idx_ali[i]))
		for g in groups:
			if len(g)>10:
				res_idx_after_ali[i].append(g)

	res_idx_filtered_ali = []
	for i in range(len(res_idx_after_ali)-1):
		for j in range(i+1,len(res_idx_after_ali)):
			arr1 = np.array(res_idx_after_ali[i])
			arr2 = np.array(res_idx_after_ali[j])
			arr1 = np.setdiff1d(arr1, arr2)
		res_idx_filtered_ali.append(arr1)
	res_idx_filtered_ali.append(np.setdiff1d(np.array(res_idx_after_ali[j]),np.ones(1)*-1))

	mapping_on_str = []
	for i in range(len(res_idx_filtered_ali)):
		mapping_on_str.append([])
		for j in range(len(res_idx_filtered_ali[i])):
			mapping_on_str[i].append(res_idx_ali[i].index(res_idx_filtered_ali[i][j])) 
	
	str_idx = []
	for i in range(len(mapping_on_str)):
		str_idx.append([])
		for j in range(len(mapping_on_str[i])):
			str_idx[i].append(res_idx_str[i][mapping_on_str[i][j]])

	count = 0
	total_length = 0
	gate = 0
	coords = np.array([])
	for i in range(len(bp1_chid_idx)):
		if gate == 0:
			if len(str_idx[count])!=0:
				coords = parsePDB(bound_pro, subset='ca', chain=bp_chid[bp1_chid_idx[i]]).getCoords()[np.array(str_idx[count]),:]
				total_length += len(coords)
			count = count + 1
			if len(coords) != 0:
				gate = 1
		else:
			if len(str_idx[count])!=0:
				new_coords = parsePDB(bound_pro, subset='ca',chain=bp_chid[bp1_chid_idx[i]]).getCoords()[np.array(str_idx[count]),:]
				total_length += len(new_coords)
			count = count + 1
			if len(coords)!=0 and len(new_coords)!=0:
				coords = np.concatenate((coords, new_coords), axis=0)

	for i in range(len(bp2_chid_idx)):
		if len(str_idx[count])!=0:
			new_coords = parsePDB(bound_pro, subset='ca',chain=bp_chid[bp2_chid_idx[i]]).getCoords()[np.array(str_idx[count]),:]
			total_length += len(new_coords)
		count = count + 1
		if len(coords)!=0 and len(new_coords)!=0:
			coords = np.concatenate((coords, new_coords), axis=0)

	dist = buildDistMatrix(coords)
	dist_ini = dist < 8	

	PC = buildPCMatrix(finalRefinedMSA)
	PC_ranked = calcRankorder(PC)

	result = zeros(8)
	for z in range(len(thold)):
		num = int(len(PC_ranked[0])*thold[z])
		for i in range(num):
			result[z]+=dist_ini[PC_ranked[0][i],PC_ranked[1][i]]
		result[z]/=num

	np.savetxt(bound_pro + '.res', np.array(result))
コード例 #48
0
ファイル: trees.py プロジェクト: sarab609/scraps
def infer_tree(clwfile, phyfile):
    AlignIO.convert(clwfile,"clustal",phyfile,"phylip-relaxed")
    phyml_exe = "tools/PhyML_3.0/PhyML_3.0_win32.exe"
    #cline = PhymlCommandline(phyml_exe,input=phyfile,datatype='nt',model='HKY85',alpha='e',bootstrap=-1)
    cline = PhymlCommandline(phyml_exe,input=phyfile,datatype='aa',model='WAG',alpha='e',bootstrap=-1)
    out_log, err_log = cline()
コード例 #49
0
 def test_distances_from_AlignIO_DNA(self):
     """Calculate a distance matrix from an alignment written by AlignIO."""
     n = AlignIO.convert("Clustalw/opuntia.aln", "clustal",
                         "Phylip/opuntia.phy", "phylip")
     self.assertEqual(n, 1)
     self.distances_from_alignment("Phylip/opuntia.phy")
コード例 #50
0
#!/usr/bin/python

from subprocess import call
from Bio import AlignIO
import os
import sys

try:
	file = sys.argv[1]
except:
	file = raw_input("Introduce FASTA file: ")
try:
	call("mkdir phyml", shell=True)
except:
	pass
os.chdir("phyml")
AlignIO.convert("../"+file, "fasta", file+".phy", "phylip-relaxed")

call("nice phyml -i %s -d nt -b 1000 -b -4 -m GTR -s BEST -v e -c 4 -a e" % (file+".phy"), shell=True)
コード例 #51
0
 def test_distances_from_protein_AlignIO(self):
     """Calculate distance matrix from an AlignIO written protein alignment."""
     n = AlignIO.convert("Clustalw/hedgehog.aln", "clustal",
                         "Phylip/hedgehog.phy", "phylip")
     self.assertEqual(n, 1)
     self.distances_from_alignment("Phylip/hedgehog.phy", DNA=False)
コード例 #52
0
ファイル: __init__.py プロジェクト: JAlvarezJarreta/MEvoLib
def get_phylogeny ( binary, infile, infile_format, args = 'default',
                    outfile = None, outfile_format = 'newick',
                    bootstraps = 0 ) :
    """
    Infer the phylogeny from the input alignment using the phylogenetic
    inference tool and arguments given. The resultant phylogeny is returned as a
    Bio.Phylo.BaseTree object and saved in the ouput file (if provided). If
    'infile' or 'outfile' contain a relative path, the current working directory
    will be used to get the absolute path. If the output file already exists,
    the old file will be overwritten without any warning.

    Arguments :
        binary  ( string )
            Name or path of the phylogenetic inference tool.
        infile  ( string )
            Sequence alignment file.
        infile_format  ( string )
            Input file format.
        args  ( Optional[string] )
            Keyword or arguments to use in the call of the phylogenetic
            inference tool, excluding infile and outfile arguments. By default,
            'default' arguments are used.
        outfile  ( Optional[string] )
            Phylogenetic tree output file.
        outfile_format  ( Optional[string] )
            Output file format. By default, NEWICK format.
        bootstraps  ( Optional[int] )
            Number of bootstraps to generate. By default, 0 (only use the input
            alignment).

    Returns :
        Bio.Phylo.BaseTree
            Resultant phylogenetic tree.
        float
            Log-likelihood score of the phylogeny.

    Raises :
        ValueError
            If the tool introduced isn't included in MEvoLib.
        IOError
            If the input path or the input file provided doesn't exist.
        RuntimeError
            If the call to the phylogenetic inference tool command raises an
            exception.

    * The input file format must be supported by Bio.AlignIO.
    * The output file format must be supported by Bio.Phylo.
    """
    # Get the variables associated with the given phylogenetic inference tool
    bin_path, bin_name = os.path.split(binary)
    bin_name = bin_name.lower()
    if ( bin_name in _PHYLO_TOOL_TO_LIB ) :
        tool_lib = _PHYLO_TOOL_TO_LIB[bin_name]
        sprt_infile_formats = tool_lib.SPRT_INFILE_FORMATS
        gen_args = tool_lib.gen_args
        get_results = tool_lib.get_results
        cleanup = tool_lib.cleanup
    else : # bin_name not in _PHYLO_TOOL_TO_LIB
        message = 'The phylogenetic inference tool "{}" isn\'t included in ' \
                  'MEvoLib.Inference'.format(bin_name)
        raise ValueError(message)
    # Get the command line to run in order to get the resultant phylogeny
    infile_path = get_abspath(infile)
    # If the input file format is not supported by the phylogenetic inference
    # tool, convert it to a temporary supported file
    if ( infile_format.lower() not in sprt_infile_formats ) :
        tmpfile = tempfile.NamedTemporaryFile()
        AlignIO.convert(infile_path, infile_format, tmpfile.name,
                        sprt_infile_formats[0])
        infile_path = tmpfile.name
    # Create full command line list
    command = [binary] + gen_args(args, infile_path, bootstraps)
    # Run the phylogenetic inference process handling any Runtime exception
    try :
        output = subprocess.check_output(command, stderr=DEVNULL,
                                         universal_newlines=True)
    except subprocess.CalledProcessError as e :
        cleanup(command)
        message = 'Running "{}" raised an exception'.format(' '.join(e.cmd))
        raise RuntimeError(message)
    else :
        phylogeny, score = get_results(command, output)
        if ( outfile ) :
            # Save the resultant phylogeny in the given outfile and format
            outfile_path = get_abspath(outfile)
            Phylo.write(phylogeny, outfile_path, outfile_format)
        cleanup(command)
        # Return the resultant phylogeny as a Bio.Phylo.BaseTree object and its
        # log-likelihood score
        return ( phylogeny, score )
コード例 #53
0
 def test_parsimony_tree_from_AlignIO_DNA(self):
     """Make a parsimony tree from an alignment written with AlignIO."""
     n = AlignIO.convert("Clustalw/opuntia.aln", "clustal",
                         "Phylip/opuntia.phy", "phylip")
     self.assertEqual(n, 1)
     self.parsimony_tree("Phylip/opuntia.phy", "phylip")
コード例 #54
0
ファイル: ALIGN.py プロジェクト: lkrippahl/pycoevol
 def computeAlignment(self, id, alignment):
     "Computes multiple sequence alignment with inputed method"
     
     if alignment == "clustalw":
         gop = clustalw_gap_opening
         gep = clustalw_gap_extension
         d_matrix = clustalw_distance_matrix
         
         input_sequences = "./Data/" + id + ".fasta"
         output_align = "./Data/" + id + ".aln"
         output_fasta = "./Data/" + id + "_clustalw.fasta"
         output_tree = "./Data/" + id + ".dnd"
         clustalw = ClustalwCommandline(infile=input_sequences, 
                                        outfile=output_align, 
                                        newtree=output_tree, 
                                        align="input",  
                                        seqnos="ON", 
                                        outorder="input", 
                                        type="PROTEIN", 
                                        pwmatrix=d_matrix, 
                                        gapopen=gop, 
                                        gapext=gep) 
         clustalw()
         AlignIO.convert(output_align, "clustal", output_fasta, "fasta")
         try:
             remove(output_align)
             remove(output_tree)
         except:
             pass
         
     elif alignment == "muscle":
         iteration = muscle_max_iteration
         
         input_sequences = "./Data/" + id + ".fasta"
         output_align = "./Data/" + id + "_muscle.aln"
         output_fasta = "./Data/" + id + "_muscle.fasta"
         
         muscle = MuscleCommandline(input=input_sequences, 
                                    out=output_align,
                                    clwstrict=True, 
                                    maxiters=iteration)
         muscle()
         AlignIO.convert(output_align, "clustal", output_fasta, "fasta")
         try:
             remove(output_align)
         except:
             pass
         
         organism_order = []
         input_sequences = "./Data/" + id + ".fasta"
         align = SeqIO.parse(input_sequences, "fasta", IUPAC.protein)
         for record in align:
             org = record.description
             organism_order.append(org)
             
         rec = dict()
         output_fasta = "./Data/" + id + "_muscle.fasta"
         align = SeqIO.parse(output_fasta, "fasta", IUPAC.protein)
         for record in align:
             org = str(record.description)
             seq = str(record.seq)
             rec[org]= seq
         
         fasta = open(output_fasta, "w")
         fasta.close()
         fasta = open(output_fasta, "a")
         for org in (organism_order):
             seq = rec[org]
             fasta.write(">" + org + "\n" + seq + "\n")
         fasta.close()
         
     else:
         configuration = mafft_configuration
         threads = mafft_threading
         input_sequences = "./Data/" + id + ".fasta"
         output_fasta = "./Data/" + id + "_mafft.fasta"
         
         if configuration == "fftnsi":
             if threads == False:
                 fftnsi = "mafft --retree 2 --maxiterate 1000 --inputorder "
                 mafft = system(fftnsi + input_sequences + ">" + output_fasta)
                 mafft
             else:
                 try:
                     threads = int(threads)
                     fftnsi = "mafft --retree 2 --maxiterate 1000\
                      --inputorder --threads %i " %(threads)
                     mafft = system(fftnsi + input_sequences + ">" + output_fasta)
                     mafft
                 except:
                     fftnsi = "mafft --retree 2 --maxiterate 1000 --inputorder "
                     mafft = system(fftnsi + input_sequences + ">" + output_fasta)
                     mafft
         else:
             if threads == False:
                 linsi = "mafft --localpair --maxiterate 1000 --inputorder "
                 mafft = system(linsi + input_sequences + ">" + output_fasta)
                 mafft
             else:
                 try:
                     threads = int(threads)
                     linsi = "mafft --localpair --maxiterate 1000\
                      --inputorder --threads %i " %(threads)
                     mafft = system(linsi + input_sequences + ">" + output_fasta)
                     mafft
                 except:
                     linsi = "mafft --localpair --maxiterate 1000 --inputorder "
                     mafft = system(linsi + input_sequences + ">" + output_fasta)
                     mafft    
コード例 #55
0
 def test_parsimony_from_AlignIO_protein(self):
     """Make a parsimony tree from protein alignment written with AlignIO."""
     n = AlignIO.convert("Clustalw/hedgehog.aln", "clustal",
                         "Phylip/hedgehog.phy", "phylip")
     self.parsimony_tree("Phylip/interlaced.phy", "phylip", DNA=False)
コード例 #56
0
def write_AlignIO_dna():
    """Convert opuntia.aln to a phylip file"""
    assert 1 == AlignIO.convert("Clustalw/opuntia.aln", "clustal",
                                "Phylip/opuntia.phy", "phylip")
コード例 #57
0
 def test_bootstrap_AlignIO_DNA(self):
     """Pseudosample a phylip DNA alignment written with AlignIO."""
     n = AlignIO.convert("Clustalw/opuntia.aln", "clustal",
                         "Phylip/opuntia.phy", "phylip")
     self.assertEqual(n, 1)
     self.check_bootstrap("Phylip/opuntia.phy", "phylip")
コード例 #58
0
ファイル: BioMagick.py プロジェクト: LeeBergstrand/BioMagick
def direct_convert(settings, id_results, out_path, out_formats, alphabet):
	if out_path is None:
		out_file = "./conv.tmp"
		in_path, in_format = list(id_results.items())[0]
		out_format = out_formats[0]

		if in_format == "unidentified":
			raise Exception("Failed to identify the file")

		try:
			format_setting = settings[in_format]
			if format_setting.bioclass == "seq":
				SeqIO.convert(in_path, in_format.lower(), out_file, out_format, alphabet)
			elif format_setting.bioclass == "phylo":
				Phylo.convert(in_path, in_format.lower(), out_file, out_format)
			elif format_setting.bioclass == "align":
				AlignIO.convert(in_path, in_format.lower(), out_file, out_format)
			else:
				print("Error: invalid BioPython conversion class: %s" % format_setting.bioclass)
				sys.exit(1)
		except ValueError as e:
			print("Error in conversion of " + in_path + " to " + out_format + ": " + str(e))
			sys.exit(1)

		with open(out_file, "r") as tmp_file:
			print(tmp_file.read())

		os.remove(out_file)  # Is this really necessary?
	else:
		for out_format in out_formats:
			for in_path, in_format in id_results.items():
				out_file = out_path
				if sys.platform == "win32":
					if out_file[-1] != "\\":
						out_file += "\\"

					out_file += ntpath.basename(in_path).split('.')[0]
				else:
					if out_file[-1] != "/":
						out_file += "/"

					out_file += os.path.basename(in_path).split('.')[0]

				out_extension = settings[out_format].extension
				out_file = out_file + "." + out_extension
				print("\nConverting %s file %s to %s file %s" % (in_format, in_path, out_format, out_file))

				try:
					format_setting = settings[in_format]
					if format_setting.bioclass == "seq":
						SeqIO.convert(in_path, in_format.lower(), out_file, out_format, alphabet)
					elif format_setting.bioclass == "phylo":
						Phylo.convert(in_path, in_format.lower(), out_file, out_format)
					elif format_setting.bioclass == "align":
						AlignIO.convert(in_path, in_format.lower(), out_file, out_format)
					else:
						print("Error: invalid BioPython conversion class: %s" % format_setting.bioclass)
						sys.exit(1)
				except ValueError as e:
					print("\nError in conversion of " + in_path + " to " + out_format + ": " + str(e))
					print("Skipping " + in_path + " ...\n")
					continue
コード例 #59
0
def convert_alignment_format(input_msa_clustal, output_msa_phylip):
    AlignIO.convert(input_msa_clustal, "clustal", output_msa_phylip,
                    "phylip-sequential")
    return