def concat(mypath, same_taxa): ''' Combine multiple nexus data matrices in one partitioned file. By default this will only work if the same taxa are present in each file use same_taxa=False if you are not concerned by this From: http://biopython.org/wiki/Concatenate_nexus small change: added onlyfiles block to remove hidden files ''' onlyfiles = [] for item in os.listdir(mypath): if not item.startswith('.') and os.path.isfile( os.path.join(mypath, item)): onlyfiles.append(item) nexi = [] for nex in onlyfiles: nex_open = open(nex, 'r') nex_save = Nexus.Nexus(nex_open) nexi.append((nex, nex_save)) if same_taxa: if not check_taxa(nexi): return Nexus.combine(nexi) else: return Nexus.combine(nexi)
def main(): args = get_args() # setup logging log, my_name = setup_logging(args) # read alignments log.info("Reading input alignments in NEXUS format") nexus_files = glob.glob(os.path.join(args.alignments, '*.nex*')) data = [(os.path.basename(fname), Nexus.Nexus(fname)) for fname in nexus_files] log.info("Concatenating files") concatenated = Nexus.combine(data) if not args.nexus: concat_file = os.path.join(args.output, os.path.basename(args.alignments) + ".phylip") if args.charsets: sets = concatenated.append_sets() charset_file = os.path.join(args.output, os.path.basename(args.alignments) + ".charsets") log.info("Writing charsets to {}".format( charset_file )) with open(charset_file, 'w') as outf: outf.write(sets) log.info("Writing concatenated PHYLIP alignment to {}".format(concat_file)) concatenated.export_phylip(concat_file) else: concat_file = os.path.join(args.output, os.path.basename(args.alignments) + ".nexus") if args.charsets: log.info("Writing concatenated alignment to NEXUS format (with charsets)") concatenated.write_nexus_data(concat_file) else: log.info("Writing concatenated alignment to NEXUS format (without charsets)") concatenated.write_nexus_data(concat_file, append_sets=False) # end text = " Completed {} ".format(my_name) log.info(text.center(65, "="))
def combine(arg): file_format = get_format(arg) if file_format == 'fasta': arg = convert(arg) name_data = [(clean_name(name), Nexus.Nexus(name)) for name in arg.input] combined = Nexus.combine(name_data) combined.write_nexus_data(filename=arg.output)
def concatNexusAlignments(processes): # take the list of fasta alingments and convert each to a nexus file and # concat all the nexus files into 1 alingment pool = ThreadPool(processes) already_done = [ x.split('.')[0] for x in os.listdir('{}/nexus'.format(base_dir)) ] fastas = [ '{}/fasta/{}'.format(base_dir, file) for file in os.listdir('{}/fasta'.format(base_dir)) if file.split('.')[0] not in already_done ] list( tqdm(pool.imap(convertFastaToNexus, fastas), total=len(fastas), desc='Fastas to Nexus...')) combined_nexus = '{}/WGS.nex'.format(base_dir) if os.path.isfile(combined_nexus): return combined_nexus nexus = [ '{}/nexus/{}'.format(base_dir, file) for file in os.listdir('{}/nexus'.format(base_dir)) ] nexus = [(filename, Nexus.Nexus(filename)) for filename in nexus] combined = Nexus.combine(nexus) combined.write_nexus_data(filename=open(combined_nexus, 'w')) return combined_nexus
def main(): args = get_args() #pdb.set_trace() # get filenames in directory and convert to array files = numpy.array(glob.glob(os.path.join(args.nexus, '*.nex*'))) # make sure we have enough assert len(files) >= args.sample_size, "Sample size must be < number(files)" print "Running" for i in xrange(args.samples): sys.stdout.write('.') sys.stdout.flush() # get list of random numbers sample = numpy.random.random_integers(0, len(files) - 1, args.sample_size) # reindex filenames by random selections random_files = sorted(files[sample].tolist()) # concatenate and output files_to_combine = [(f, Nexus.Nexus(f)) for f in random_files] combined = Nexus.combine(files_to_combine) align_name = "random-sample-{}-{}-loci.nex".format(i, args.sample_size) # open metadata file meta_name = 'META-random-sample-{}-{}-loci.txt'.format(i, args.sample_size) meta = open( os.path.join(args.output, meta_name), 'w' ) meta.write('{}'.format('\n'.join(random_files))) meta.close() combined.write_nexus_data(filename=open( os.path.join(args.output, align_name), 'w') ) sys.stdout.write("Done")
def concat_alignment(files, output): nexi = [(fname.replace(alignments_dir, '').replace(".", "").replace("-", ""), Nexus.Nexus(fname)) for fname in files] combined = Nexus.combine(nexi) combined.write_nexus_data(filename=open(output, 'w'))
def export_nexus(aln, charset_name): nexus_list_names = split_nexus_by_charsets(aln, charset_name) nexus_tuples = [] for name in nexus_list_names: nexus_tuples.append((name, Nexus.Nexus(name))) concat = Nexus.combine(nexus_tuples) concat.write_nexus_data('%s_concat.nex' % (aln.rstrip(".nex")))
def main(): args = get_args() print "Reading files..." nexus_files = glob.glob(os.path.join(args.input, '*.nex*')) data = [(fname, Nexus.Nexus(fname)) for fname in nexus_files] print "Concatenating files..." concatenated = Nexus.combine(data) print "Writing to phylip..." concatenated.export_phylip(args.output)
def concat_nexus_alignment(path: str, output_name: str): wildcards = path + "/*.nex" outname = path + "/" + output_name fnames = glob(wildcards) nex_list = [(nex, Nexus.Nexus(nex)) for nex in fnames] concat = Nexus.combine(nex_list) concat.write_nexus_data(filename=open(outname, "w")) print(f"DONE! File is written as {outname}")
def alignment_slicer(input, informat, outformat, SNPs, slide): alignment = AlignIO.read(input, informat, alphabet = generic_dna) alignment_seq_count = len(alignment) first_seq = (alignment[0].seq) length_alignment = len(first_seq) chars_to_ignore = ['N'] start = 0 end = start + args.SNPs_in_window while end <= length_alignment: with open(input+'_site'+str(start)+'to'+str(end)+'.'+outformat, 'w') as output_handle: # print 'start:', start # print 'end:', end alignment_iteration = MultipleSeqAlignment(alignment[:, start:end], alphabet=generic_dna) if outformat.lower() == 'nexus': n_alignments = [] alignment_iteration = alignment_iteration.format('nexus') n_alignments.append(('site'+str(start)+'to'+str(end),Nexus.Nexus(alignment_iteration))) combined = Nexus.combine(n_alignments) combined.write_nexus_data(output_handle) else: AlignIO.write(alignment_iteration, output_handle, outformat) # print alignment_iteration start += args.slide end += args.slide else: with open(input+'_site'+str(start)+'to'+str(length_alignment)+'.'+outformat, 'w') as output_handle: n_alignments = [] # print 'now in else loop\n' # print 'start:', start # print 'end:', length_alignment alignment_iteration = MultipleSeqAlignment(alignment[:, start:length_alignment], alphabet=generic_dna) if outformat.lower() == 'nexus': n_alignments = [] alignment_iteration = alignment_iteration.format('nexus') n_alignments.append(('site'+str(start)+'to'+str(end),Nexus.Nexus(alignment_iteration))) combined = Nexus.combine(n_alignments) combined.write_nexus_data(output_handle) else: AlignIO.write(alignment_iteration, output_handle, outformat) # print alignment_iteration print "\ndone\n"
def main(): args = get_args() print "Reading files..." nexus_files = glob.glob(os.path.join(args.input, '*.nex*')) data = [(fname, Nexus.Nexus(fname)) for fname in nexus_files] print "Concatenating files..." concatenated = Nexus.combine(data) #print "Writing temp nexus..." #fd, temp = tempfile.mkstemp(suffix='.nexus') #concatenated.write_nexus_data(filename=os.fdopen(fd, 'w')) print "Writing to phylip..." concatenated.export_phylip(args.output)
def concatNexAlns(): """Combine multiple nexus data matrices in one partitioned file. By default this will only work if the same taxa are present in each file use same_taxa=False if you are not concerned by this """ nexdir = '{}/nexus/'.format(base_dir) filelist = [x for x in os.listdir(nexdir) if x.endswith('.nex')] nexi = [(os.path.join(nexdir, fname), Nexus.Nexus(os.path.join(nexdir, fname))) for fname in filelist] coutname = '{}/concat_aln_species_tree.nex'.format(base_dir) combined = Nexus.combine(nexi) combined.write_nexus_data(filename=open(coutname, 'w')) return coutname
def fully_partition(metadata, aligns): to_combine = [] start = 1 for model in metadata: for locus in metadata[model]: nex = Nexus.Nexus(open(os.path.join(aligns, "{0}.nex".format(locus)))) end = start + nex.nchar - 1 metadata[model][locus] = (start, end) to_combine.append((locus, nex)) start = end + 1 combined = Nexus.combine(to_combine) #pdb.set_trace() return combined, metadata
def fully_partition(metadata, aligns): to_combine = [] start = 1 for model in metadata: for locus in metadata[model]: nex = Nexus.Nexus( open(os.path.join(aligns, "{0}.nex".format(locus)))) end = start + nex.nchar - 1 metadata[model][locus] = (start, end) to_combine.append((locus, nex)) start = end + 1 combined = Nexus.combine(to_combine) #pdb.set_trace() return combined, metadata
def concattophylip(directory, outdir): print("Making concat and charset files.") os.makedirs(outdir + '/phylip') os.chdir(directory) file_list = glob.glob('*.nex*') nexi = [(fname, Nexus.Nexus(fname)) for fname in file_list] combined = Nexus.combine(nexi) sets = combined.append_sets() concat_file = outdir + '/phylip/concatdata.phylip' combined.export_phylip(concat_file) charset_file = outdir + '/phylip/charsets.charsets' with open(charset_file, 'w') as outf: outf.write(sets) outf.close()
def concatNexAlns( nexDir, outname, same_taxa=True): #from https://biopython.org/wiki/Concatenate_nexus """Combine multiple nexus data matrices in one partitioned file. By default this will only work if the same taxa are present in each file use same_taxa=False if you are not concerned by this """ filelist = [x for x in os.listdir(nexDir) if x.endswith('.nex')] nexi = [(os.path.join(nexDir, fname), Nexus.Nexus(os.path.join(nexDir, fname))) for fname in filelist] coutname = 'concat_stree_aln_{}.nex'.format(outname) combined = Nexus.combine(nexi) combined.write_nexus_data(filename=open(coutname, 'w')) return coutname
def model_partition(metadata, aligns): to_combine = [] start = 1 end = 0 new_metadata = OrderedDict() for model in metadata: for locus in metadata[model]: nex = Nexus.Nexus(open(os.path.join(aligns, "{0}.nex".format(locus)))) end += nex.nchar to_combine.append((locus, nex)) new_metadata[model] = (start, end) start = end + 1 combined = Nexus.combine(to_combine) #pdb.set_trace() return combined, new_metadata
def model_partition(metadata, aligns): to_combine = [] start = 1 end = 0 new_metadata = OrderedDict() for model in metadata: for locus in metadata[model]: nex = Nexus.Nexus( open(os.path.join(aligns, "{0}.nex".format(locus)))) end += nex.nchar to_combine.append((locus, nex)) new_metadata[model] = (start, end) start = end + 1 combined = Nexus.combine(to_combine) #pdb.set_trace() return combined, new_metadata
def model_partition(metadata, aligns): to_combine = [] start = 1 end = 0 new_metadata = OrderedDict() for model in metadata: for locus in metadata[model]: nex = Nexus.Nexus(open(os.path.join(aligns, "{0}.nex".format(locus)))) #s = sum([1 if 'copy' in n else 0 for n in nex.get_original_taxon_order()]) #if s > 0: # pdb.set_trace() end += nex.nchar to_combine.append((locus, nex)) new_metadata[model] = (start, end) start = end + 1 combined = Nexus.combine(to_combine) #pdb.set_trace() return combined, new_metadata
def write_variant_sites(alignment, var_sites, outfile): nex_aligns = [] #Bio.Nexus.Nexus.Nexus objects blocks = list(ranges(var_sites)) #tuples of positions for i in blocks: alignment_iteration = MultipleSeqAlignment(alignment[:, i[0]:i[1]+1], alphabet=generic_dna).format('nexus') # if i[0] == i[1]: # nex_aligns.append(('site {}'.format(str(i[1] + 1)), # Nexus.Nexus(alignment_iteration))) # else: # nex_aligns.append(('site {} to {}'.format(str(i[0]), str(i[1] + 1)), # Nexus.Nexus(alignment_iteration))) nex_aligns.append(('site {} to {}'.format(str(i[0]), str(i[1]+1)), Nexus.Nexus(alignment_iteration))) combined = Nexus.combine(nex_aligns) with open(outfile, 'w') as out: combined.write_nexus_data(out) print 'Converted {} informative sites without gaps into nexus alignment'.format(str(len(blocks)))
def model_partition(metadata, aligns): to_combine = [] start = 1 end = 0 new_metadata = OrderedDict() for model in metadata: for locus in metadata[model]: nex = Nexus.Nexus( open(os.path.join(aligns, "{0}.nex".format(locus)))) #s = sum([1 if 'copy' in n else 0 for n in nex.get_original_taxon_order()]) #if s > 0: # pdb.set_trace() end += nex.nchar to_combine.append((locus, nex)) new_metadata[model] = (start, end) start = end + 1 combined = Nexus.combine(to_combine) #pdb.set_trace() return combined, new_metadata
def _write_BEST(dataset, filestem): """ write a MrBayes block for BEST species tree estimation Used by write_multispecies(), writes a concatenated nexus file and prints MrBayes block to screen. """ fname = filestem + ".nex" #write a nexus file with partitions for each gene nexi = [] for g in dataset.get_genes(): nexi.append( (g, _nexify( dataset.get_sequences(g)))) combined = Nexus.combine(nexi) combined.write_nexus_data(filename=fname) #then build a MrBayes block for BEST d = defaultdict(list) for sp, i in zip(dataset.get_species(), [str(i) for i in xrange(1,len(dataset)+1)]): d[sp].append(i) contents = ["begin MyBayes;"] for species, OTUs in d.items(): contents.append("taxset %s = % s" % (species, " ".join(OTUs))) print "Add the following to the MrBayes block in %s" % fname for line in contents: print line
def read_collapse(file, informat, gapchar): with open(file, 'r') as input_handle: alignment = AlignIO.read(input_handle, informat, alphabet=generic_dna) summary_align = AlignInfo.SummaryInfo(alignment) first_seq = (alignment[0].seq) length_first_seq = len(first_seq) # chars_to_ignore = ['N'] my_pssm = summary_align.pos_specific_score_matrix(first_seq) index = 0 count = 0 invariant_sites_counter = 0 invariant_position_index = [] for i in my_pssm.pssm: A = i[1]['A'] C = i[1]['C'] G = i[1]['G'] T = i[1]['T'] if gapchar != None: print gapchar gap = i[1][gapchar] x = [gap, A, C, G, T] if gapchar == None: x = [A, C, G, T] print x y = [] for j in x: if j > 0: y.append(1) else: y.append(0) if sum(y[1:len(y)]) > 1: pass else: invariant_sites_counter += 1 invariant_position_index.append(count) count += 1 alignment_indices_to_write = [] n_alignments = [] for i in range(0, length_first_seq): if i not in invariant_position_index: alignment_indices_to_write.append(i) def ranges(i): for a, b in itertools.groupby(enumerate(i), lambda (x, y): y - x): b = list(b) yield b[0][1], b[-1][1] blocks = list(ranges(alignment_indices_to_write)) print '\nExcluding', str( len(invariant_position_index )), 'sites at positions:', invariant_position_index, '\n' print 'Including sites at positions:', blocks, '\n' for i in blocks: alignment_iteration = MultipleSeqAlignment( alignment[:, i[0]:i[1] + 1], alphabet=generic_dna).format('nexus') n_alignments.append(('site' + str(i[0]) + 'to' + str(i[1] + 1), Nexus.Nexus(alignment_iteration))) # #combine the alignments in n_alignments combined = Nexus.combine(n_alignments) with open(file + '_collapsed.nexus', 'w') as output_handle: print 'Writing collapsed alignment to:', file + '_collapsed.nexus\n' combined.write_nexus_data(output_handle)
__author__ = 'anastasiiakorosteleva' from Bio.Nexus import Nexus # the combine function takes a list of tuples [(name, nexus instance)...], #if we provide the file names in a list we can use a list comprehension to # create these tuples file_list = ['apoa1.nex', 'apoe.nex', 'cyt450.nex', 'ace.nex', 'ABO.nex', "apoa5.nex", 'apod.nex', 'cdk6.nex', 'CETP.nex', 'ETV6.nex', 'Gckr.nex', 'gdf5.nex','LDLR.nex', 'lpl.nex', 'NAT2.nex', 'park2.nex', 'SLC22A5.nex', 'UGT1A9.nex', 'HMGA2.nex', 'apoc1.nex'] nexuses = [(fname, Nexus.Nexus(fname)) for fname in file_list] combined = Nexus.combine(nexuses) combined.write_nexus_data(filename=open('combo.nex', 'w'))
# a little script to concatenate lots of nexus files in a folder # and write a new one. from Bio.Nexus import Nexus import os infile = "/Users/robertlanfear/Desktop/turtles-individual-nexus-files-for-loci" file_list = [x for x in os.walk(infile)][0][2] nexi = [(fname, Nexus.Nexus(fname)) for fname in file_list] combined = Nexus.combine(nexi) outfile = os.path.join(infile, "alignment.nex") combined.write_nexus_data(filename=open(outfile, 'w'))
def main(inDir, outName, fext='.gb'): # MAKE OUTPUT FOLDER outDir = os.path.join(inDir, 'output_AlignmentFiles') if not os.path.exists(outDir): os.makedirs(outDir) # EXTRACT AND COLLECT CDS FROM RECORDS files = [f for f in os.listdir(inDir) if f.endswith(fext)] masterdict_nucl = collections.OrderedDict() masterdict_prot = collections.OrderedDict() for f in files: extract_collect_CDS(masterdict_nucl, masterdict_prot, os.path.join(inDir, f)) # REMOVE ALL DUPLICATE ENTRIES (result of CDS with multiple exons) remove_duplicates(masterdict_nucl) remove_duplicates(masterdict_prot) # Note: Not sure why I have to run this removal twice, but not all # duplicates are removed first time around. remove_duplicates(masterdict_nucl) remove_duplicates(masterdict_prot) # ALIGN AND WRITE TO FILE if masterdict_nucl.items(): for k, v in masterdict_nucl.iteritems(): outFn_unalign_nucl = os.path.join(outDir, 'nucl_' + k + '.unalign.fas') # Write unaligned nucleotide sequences with open(outFn_unalign_nucl, 'w') as hndl: SeqIO.write(v, hndl, 'fasta') if not masterdict_nucl.items(): sys.exit(' ERROR: No items in nucleotide masterdictionary.') if masterdict_prot.items(): for k, v in masterdict_prot.iteritems(): outFn_unalign_prot = os.path.join(outDir, 'prot_' + k + '.unalign.fas') outFn_aligned_prot = os.path.join(outDir, 'prot_' + k + '.aligned.fas') # WRITE UNALIGNED PROTEIN SEQUENCES with open(outFn_unalign_prot, 'w') as hndl: SeqIO.write(v, hndl, 'fasta') # ALIGN SEQUENCES #import subprocess #subprocess.call(['mafft', '--auto', outFn_unalign_prot, '>', outFn_aligned_prot]) mafft_cline = MafftCommandline(input=outFn_unalign_prot) stdout, stderr = mafft_cline() with open(outFn_aligned_prot, 'w') as hndl: hndl.write(stdout) if not masterdict_prot.items(): sys.exit(' ERROR: No items in protein masterdictionary.') # BACK-TRANSLATION via Python script by Peter Cook # https://github.com/peterjc/pico_galaxy/tree/master/tools/align_back_trans for k, v in masterdict_prot.iteritems(): outFn_unalign_nucl = os.path.join(outDir, 'nucl_' + k + '.unalign.fas') outFn_aligned_nucl = os.path.join(outDir, 'nucl_' + k + '.aligned.fas') outFn_aligned_prot = os.path.join(outDir, 'prot_' + k + '.aligned.fas') try: log = subprocess.check_output([ 'python2', 'align_back_trans.py', 'fasta', outFn_aligned_prot, outFn_unalign_nucl, outFn_aligned_nucl, '11' ], stderr=subprocess.STDOUT) except: print ' ERROR: Error encountered during back-translation of', k #print log # IMPORT BACK-TRANSLATIONS AND CONCATENATE alignm_L = [] for k in masterdict_prot.keys(): aligned_nucl_fasta = os.path.join(outDir, 'nucl_' + k + '.aligned.fas') aligned_nucl_nexus = os.path.join(outDir, 'nucl_' + k + '.aligned.nex') # Convert from fasta to nexus try: alignm_fasta = AlignIO.read(aligned_nucl_fasta, 'fasta', alphabet=Alphabet.generic_dna) hndl = StringIO() AlignIO.write(alignm_fasta, hndl, 'nexus') nexus_string = hndl.getvalue() nexus_string = nexus_string.replace( '\n' + k + '_', '\ncombined_' ) # IMPORTANT: Stripping the gene name from the sequence name alignm_nexus = Nexus.Nexus(nexus_string) alignm_L.append( (k, alignm_nexus)) # Function 'Nexus.combine' needs a tuple. except: print ' ERROR: Cannot process alignment of', k # COMBINE THE NEXUS ALIGNMENTS (IN NO PARTICULAR ORDER) n_aligned_CDS = len(alignm_L) alignm_combined = Nexus.combine( alignm_L) # Function 'Nexus.combine' needs a tuple. outFn_nucl_combined_nexus = os.path.join( inDir, outName + '_nucl_' + str(n_aligned_CDS) + 'combined.aligned.nex') alignm_combined.write_nexus_data( filename=open(outFn_nucl_combined_nexus, 'w'))
__author__ = 'anastasiiakorosteleva' from Bio.Nexus import Nexus # the combine function takes a list of tuples [(name, nexus instance)...], #if we provide the file names in a list we can use a list comprehension to # create these tuples file_list = [ 'apoa1.nex', 'apoe.nex', 'cyt450.nex', 'ace.nex', 'ABO.nex', "apoa5.nex", 'apod.nex', 'cdk6.nex', 'CETP.nex', 'ETV6.nex', 'Gckr.nex', 'gdf5.nex', 'LDLR.nex', 'lpl.nex', 'NAT2.nex', 'park2.nex', 'SLC22A5.nex', 'UGT1A9.nex', 'HMGA2.nex', 'apoc1.nex' ] nexuses = [(fname, Nexus.Nexus(fname)) for fname in file_list] combined = Nexus.combine(nexuses) combined.write_nexus_data(filename=open('combo.nex', 'w'))
# newSeq = str(sequence.seq).ljust(maxLen, '-') # sequence.seq = Seq.Seq(newSeq) # # for line in add_handle: # input_handle.append(line) # # # saveName = 'Test/'+geneName+'.nex' # output_handle = open(saveName, 'w') # AlignIO.write(input_handle, output_handle, "nexus") lethrinids = glob.glob('NEX/COMBINED*Renamed_Simple.nex') outgroups = glob.glob('Outgroups/*Simple.nex') nexi = [(j, Nexus.Nexus(j)) for j in lethrinids] lethrinidsCombined = Nexus.combine(nexi) lethrinidsCombined.write_nexus_data( filename=open('OUT/Lethrinids_combined_all.nex', 'w')) nexi2 = [(j, Nexus.Nexus(j)) for j in outgroups] outgroupsCombined = Nexus.combine(nexi2) outgroupsCombined.write_nexus_data( filename=open('OUT/Outgroups_combined_all.nex', 'w')) # combined = glob.glob('Test/*Simple.nex') # nexi = [(j, Nexus.Nexus(j)) for j in combined] # combined = Nexus.combine(nexi) # combined.write_nexus_data(filename=open('Test/COMBINED_All.nex', 'w')) # combos = [f, fam] # nexi = [(j, Nexus.Nexus(j)) for j in combos] # combined = Nexus.combine(nexi)
#!/usr/bin/env python from Bio.Nexus import Nexus gene1 = open( 'gene1.nex', 'r') # For each gene you want to combine, change name of Nexus file. gene2 = open('gene2.nex', 'r') # Keep adding similar lines for the number of genes you want to combine allGenes = (gene1, gene2) # Extend this tuple as needed concatenate = Nexus.combine([(i.name, Nexus.Nexus(i)) for i in allGenes]) concatOutFile = "outputFileName.nex" # Change this to a meaningful output filename concatenate.write_nexus_data(filename=concatOutFile)
def Concatenate(prefix): file_list = glob('*.nex') nexi = [(fname, Nexus.Nexus(fname)) for fname in file_list] combined = Nexus.combine(nexi) combined.write_nexus_data(filename=open('btCOMBINED.nex', 'w')) combined.export_phylip(prefix+'.phy')
def read_collapse(file, informat, gapchar): with open(file, 'r') as input_handle: alignment = AlignIO.read(input_handle, informat, alphabet=generic_dna) summary_align = AlignInfo.SummaryInfo(alignment) first_seq = (alignment[0].seq) length_first_seq = len(first_seq) # chars_to_ignore = ['N'] my_pssm = summary_align.pos_specific_score_matrix(first_seq) index = 0 count = 0 invariant_sites_counter = 0 invariant_position_index = [] for i in my_pssm.pssm: A = i[1]['A'] C = i[1]['C'] G = i[1]['G'] T = i[1]['T'] if gapchar != None: print gapchar gap = i[1][gapchar] x = [gap, A, C, G, T] if gapchar == None: x = [A, C, G, T] print x y = [] for j in x: if j > 0: y.append(1) else: y.append(0) if sum(y[1:len(y)]) > 1: pass else: invariant_sites_counter += 1 invariant_position_index.append(count) count += 1 alignment_indices_to_write = [] n_alignments = [] for i in range(0,length_first_seq): if i not in invariant_position_index: alignment_indices_to_write.append(i) def ranges(i): for a, b in itertools.groupby(enumerate(i), lambda (x, y): y - x): b = list(b) yield b[0][1], b[-1][1] blocks = list(ranges(alignment_indices_to_write)) print '\nExcluding', str(len(invariant_position_index)),'sites at positions:',invariant_position_index,'\n' print 'Including sites at positions:',blocks,'\n' for i in blocks: alignment_iteration = MultipleSeqAlignment(alignment[:,i[0]:i[1]+1], alphabet = generic_dna).format('nexus') n_alignments.append(('site'+str(i[0])+'to'+str(i[1]+1),Nexus.Nexus(alignment_iteration))) # #combine the alignments in n_alignments combined = Nexus.combine(n_alignments) with open(file+'_collapsed.nexus', 'w') as output_handle: print 'Writing collapsed alignment to:',file+'_collapsed.nexus\n' combined.write_nexus_data(output_handle)
from Bio import Alphabet from Bio.Nexus import Nexus from Bio import SeqIO import sys # Expected arguments: (1) output file name, (2)-(N) input fasta files print("Converting fasta files to nexus...") print("") nexusFiles = [] for fastaIn in sys.argv[2:]: print(fastaIn) SeqIO.convert(fastaIn, "fasta", fastaIn.replace(".fasta",".nex"), "nexus",alphabet=Alphabet.IUPAC.IUPACAmbiguousDNA()) nexusFiles.append(fastaIn.replace(".fasta",".nex")) print("Concatenating alignments...") nexusFileHandles = [] for nex in nexusFiles: nexusFileHandles.append(open(nex,'r')) concatenate = Nexus.combine([(i.name,Nexus.Nexus(i)) for i in nexusFileHandles]) concatenate.write_nexus_data(filename=sys.argv[1])
mainDir = os.getcwd() for g in glob.glob('*_sims'): # pull out gene name gene = g.split("_")[0] # create path to gene folder geneDirPath = os.path.join(mainDir,g) # move into gene folder os.chdir(geneDirPath) for p in glob.glob('posterior_predictive_sim_*'): simNum = p.split("_")[3] # make name for concat nexus file concatNex = gene + "_" + simNum + ".nex" # make folder for sim seq mbRunDirPath = os.path.join(mainDir, gene + "_" + simNum) nexOutPath = os.path.join(mbRunDirPath,concatNex) if not os.path.exists(mbRunDirPath): os.mkdir(mbRunDirPath) #debug print simNum, concatNex, mbRunDirPath, nexOutPath # move into sim seq folder os.chdir(p) seqList =["phyloSeq[1].nex", "phyloSeq[2].nex", "phyloSeq[3].nex"] nexConvert = [(f, Nexus.Nexus(f)) for f in seqList] combine = Nexus.combine(nexConvert) combine.write_nexus_data(filename=open(nexOutPath, 'w')) os.chdir(geneDirPath) os.chdir(mainDir)
from Bio.Nexus import Nexus import sys #usage : python concat_nex.py filelist.txt filename = sys.argv[ 1] # provide a list of files in nexus format as text file in command line argument with open(filename, 'r') as f: file_list = f.read().split('\n') for i in file_list: # print the list of input files print(i) nexi = [(fname, Nexus.Nexus(fname)) for fname in file_list] combined = Nexus.combine(nexi) combined.write_nexus_data(filename=open( 'concatenated.nex', 'w')) # writes output file named as concatenated.nex
#!/usr/bin/env python # Author: Gregory S Mendez # This script will create a super matrix alignment file in nexus format from input alignments in nexus format # Named variables. Every run needs the following defined: # 1) --in_dir - The directory containing the nexus alignments that need to be merged. # 2) --out - The full filepath and name you want for the output file. from Bio.Nexus import Nexus import argparse, glob # Argument Parser parser = argparse.ArgumentParser(description = 'This script will create a super matrix alignment file from input alignments') parser.add_argument('--in_dir', required=True, help='The input directory containing alignment files.') parser.add_argument('--out', required=True, help='The filepath and filename of the output file.') args = parser.parse_args() IN_DIR = args.in_dir OUT = args.out FILE_LIST = glob.glob('%s/*.nex' % IN_DIR) NEXI = [(FNAME, Nexus.Nexus(FNAME)) for FNAME in FILE_LIST] COMBINED = Nexus.combine(NEXI) COMBINED.write_nexus_data(filename=open('%s' % OUT, 'w'))