Exemple #1
0
def concat(mypath, same_taxa):
    ''' Combine multiple nexus data matrices in one partitioned file.
        By default this will only work if the same taxa are present in each file
        use  same_taxa=False if you are not concerned by this
        From: http://biopython.org/wiki/Concatenate_nexus
        small change: added onlyfiles block to remove hidden files
    '''

    onlyfiles = []
    for item in os.listdir(mypath):
        if not item.startswith('.') and os.path.isfile(
                os.path.join(mypath, item)):
            onlyfiles.append(item)

    nexi = []
    for nex in onlyfiles:
        nex_open = open(nex, 'r')
        nex_save = Nexus.Nexus(nex_open)
        nexi.append((nex, nex_save))

    if same_taxa:
        if not check_taxa(nexi):
            return Nexus.combine(nexi)
    else:
        return Nexus.combine(nexi)
def main():
    args = get_args()
    # setup logging
    log, my_name = setup_logging(args)
    # read alignments
    log.info("Reading input alignments in NEXUS format")
    nexus_files = glob.glob(os.path.join(args.alignments, '*.nex*'))
    data = [(os.path.basename(fname), Nexus.Nexus(fname)) for fname in nexus_files]
    log.info("Concatenating files")
    concatenated = Nexus.combine(data)
    if not args.nexus:
        concat_file = os.path.join(args.output, os.path.basename(args.alignments) + ".phylip")
        if args.charsets:
            sets = concatenated.append_sets()
            charset_file = os.path.join(args.output, os.path.basename(args.alignments) + ".charsets")
            log.info("Writing charsets to {}".format(
                charset_file
            ))
            with open(charset_file, 'w') as outf:
                outf.write(sets)
        log.info("Writing concatenated PHYLIP alignment to {}".format(concat_file))
        concatenated.export_phylip(concat_file)
    else:
        concat_file = os.path.join(args.output, os.path.basename(args.alignments) + ".nexus")
        if args.charsets:
            log.info("Writing concatenated alignment to NEXUS format (with charsets)")
            concatenated.write_nexus_data(concat_file)
        else:
            log.info("Writing concatenated alignment to NEXUS format (without charsets)")
            concatenated.write_nexus_data(concat_file, append_sets=False)
    # end
    text = " Completed {} ".format(my_name)
    log.info(text.center(65, "="))
Exemple #3
0
def combine(arg):
    file_format = get_format(arg)
    if file_format == 'fasta':
        arg = convert(arg)
    name_data = [(clean_name(name), Nexus.Nexus(name)) for name in arg.input]
    combined = Nexus.combine(name_data)
    combined.write_nexus_data(filename=arg.output)
Exemple #4
0
def concatNexusAlignments(processes):
    # take the list of fasta alingments and convert each to a nexus file and
    # concat all the nexus files into 1 alingment
    pool = ThreadPool(processes)
    already_done = [
        x.split('.')[0] for x in os.listdir('{}/nexus'.format(base_dir))
    ]
    fastas = [
        '{}/fasta/{}'.format(base_dir, file)
        for file in os.listdir('{}/fasta'.format(base_dir))
        if file.split('.')[0] not in already_done
    ]
    list(
        tqdm(pool.imap(convertFastaToNexus, fastas),
             total=len(fastas),
             desc='Fastas to Nexus...'))
    combined_nexus = '{}/WGS.nex'.format(base_dir)
    if os.path.isfile(combined_nexus):
        return combined_nexus
    nexus = [
        '{}/nexus/{}'.format(base_dir, file)
        for file in os.listdir('{}/nexus'.format(base_dir))
    ]
    nexus = [(filename, Nexus.Nexus(filename)) for filename in nexus]
    combined = Nexus.combine(nexus)
    combined.write_nexus_data(filename=open(combined_nexus, 'w'))
    return combined_nexus
def main():
    args = get_args()
    #pdb.set_trace()
    # get filenames in directory and convert to array
    files = numpy.array(glob.glob(os.path.join(args.nexus, '*.nex*')))
    # make sure we have enough
    assert len(files) >= args.sample_size, "Sample size must be < number(files)"
    print "Running"
    for i in xrange(args.samples):
        sys.stdout.write('.')
        sys.stdout.flush()
        # get list of random numbers
        sample = numpy.random.random_integers(0, len(files) - 1, args.sample_size)
        # reindex filenames by random selections
        random_files = sorted(files[sample].tolist())
        # concatenate and output
        files_to_combine = [(f, Nexus.Nexus(f)) for f in random_files]
        combined = Nexus.combine(files_to_combine)
        align_name = "random-sample-{}-{}-loci.nex".format(i, args.sample_size)
        # open metadata file
        meta_name = 'META-random-sample-{}-{}-loci.txt'.format(i, args.sample_size)
        meta = open(
                os.path.join(args.output, meta_name), 'w'
            )
        meta.write('{}'.format('\n'.join(random_files)))
        meta.close()
        combined.write_nexus_data(filename=open(
                os.path.join(args.output, align_name), 'w')
            )
    sys.stdout.write("Done")
def concat_alignment(files, output):
    nexi = [(fname.replace(alignments_dir,
                           '').replace(".",
                                       "").replace("-",
                                                   ""), Nexus.Nexus(fname))
            for fname in files]
    combined = Nexus.combine(nexi)
    combined.write_nexus_data(filename=open(output, 'w'))
def export_nexus(aln, charset_name):
    nexus_list_names = split_nexus_by_charsets(aln, charset_name)

    nexus_tuples = []
    for name in nexus_list_names:
        nexus_tuples.append((name, Nexus.Nexus(name)))

    concat = Nexus.combine(nexus_tuples)
    concat.write_nexus_data('%s_concat.nex' % (aln.rstrip(".nex")))
def main():
    args = get_args()
    print "Reading files..."
    nexus_files = glob.glob(os.path.join(args.input, '*.nex*'))
    data = [(fname, Nexus.Nexus(fname)) for fname in nexus_files]
    print "Concatenating files..."
    concatenated = Nexus.combine(data)
    print "Writing to phylip..."
    concatenated.export_phylip(args.output)
Exemple #9
0
def concat_nexus_alignment(path: str, output_name: str):
    wildcards = path + "/*.nex"
    outname = path + "/" + output_name
    fnames = glob(wildcards)
    nex_list = [(nex, Nexus.Nexus(nex)) for nex in fnames]

    concat = Nexus.combine(nex_list)
    concat.write_nexus_data(filename=open(outname, "w"))
    print(f"DONE! File is written as {outname}")
def main():
    args = get_args()
    print "Reading files..."
    nexus_files = glob.glob(os.path.join(args.input, '*.nex*'))
    data = [(fname, Nexus.Nexus(fname)) for fname in nexus_files]
    print "Concatenating files..."
    concatenated = Nexus.combine(data)
    print "Writing to phylip..."
    concatenated.export_phylip(args.output)
Exemple #11
0
def alignment_slicer(input, informat, outformat, SNPs, slide):
	alignment =  AlignIO.read(input, informat, alphabet = generic_dna)
	alignment_seq_count = len(alignment)
	first_seq = (alignment[0].seq)
	length_alignment = len(first_seq)
	chars_to_ignore = ['N']
	
	start = 0
	end = start + args.SNPs_in_window
	while end <= length_alignment:
		with open(input+'_site'+str(start)+'to'+str(end)+'.'+outformat, 'w') as output_handle:
			
	# 		print 'start:', start
	# 		print 'end:', end
			alignment_iteration = MultipleSeqAlignment(alignment[:, start:end], alphabet=generic_dna)
			if outformat.lower() == 'nexus':
				n_alignments = []
				alignment_iteration = alignment_iteration.format('nexus')
				n_alignments.append(('site'+str(start)+'to'+str(end),Nexus.Nexus(alignment_iteration)))
				combined = Nexus.combine(n_alignments)
				combined.write_nexus_data(output_handle)
			else:
				AlignIO.write(alignment_iteration, output_handle, outformat)
	# 		print alignment_iteration
			start += args.slide
			end += args.slide
	else:
		with open(input+'_site'+str(start)+'to'+str(length_alignment)+'.'+outformat, 'w') as output_handle:
			n_alignments = []
	# 		print 'now in else loop\n'
	# 		print 'start:', start
	# 		print 'end:', length_alignment
			alignment_iteration = MultipleSeqAlignment(alignment[:, start:length_alignment], alphabet=generic_dna)
			if outformat.lower() == 'nexus':
				n_alignments = []
				alignment_iteration = alignment_iteration.format('nexus')
				n_alignments.append(('site'+str(start)+'to'+str(end),Nexus.Nexus(alignment_iteration)))
				combined = Nexus.combine(n_alignments)
				combined.write_nexus_data(output_handle)
			else:
				AlignIO.write(alignment_iteration, output_handle, outformat)
	# 		print alignment_iteration
		print "\ndone\n"
def main():
    args = get_args()
    print "Reading files..."
    nexus_files = glob.glob(os.path.join(args.input, '*.nex*'))
    data = [(fname, Nexus.Nexus(fname)) for fname in nexus_files]
    print "Concatenating files..."
    concatenated = Nexus.combine(data)
    #print "Writing temp nexus..."
    #fd, temp = tempfile.mkstemp(suffix='.nexus')
    #concatenated.write_nexus_data(filename=os.fdopen(fd, 'w'))
    print "Writing to phylip..."
    concatenated.export_phylip(args.output)
Exemple #13
0
def concatNexAlns():
    """Combine multiple nexus data matrices in one partitioned file.
    By default this will only work if the same taxa are present in each file
    use same_taxa=False if you are not concerned by this """
    nexdir = '{}/nexus/'.format(base_dir)
    filelist = [x for x in os.listdir(nexdir) if x.endswith('.nex')]
    nexi = [(os.path.join(nexdir,
                          fname), Nexus.Nexus(os.path.join(nexdir, fname)))
            for fname in filelist]
    coutname = '{}/concat_aln_species_tree.nex'.format(base_dir)
    combined = Nexus.combine(nexi)
    combined.write_nexus_data(filename=open(coutname, 'w'))
    return coutname
def fully_partition(metadata, aligns):
    to_combine = []
    start = 1
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(open(os.path.join(aligns, "{0}.nex".format(locus))))
            end = start + nex.nchar - 1
            metadata[model][locus] = (start, end)
            to_combine.append((locus, nex))
            start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, metadata
def fully_partition(metadata, aligns):
    to_combine = []
    start = 1
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(
                open(os.path.join(aligns, "{0}.nex".format(locus))))
            end = start + nex.nchar - 1
            metadata[model][locus] = (start, end)
            to_combine.append((locus, nex))
            start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, metadata
Exemple #16
0
def concattophylip(directory, outdir):
    print("Making concat and charset files.")
    os.makedirs(outdir + '/phylip')
    os.chdir(directory)
    file_list = glob.glob('*.nex*')
    nexi = [(fname, Nexus.Nexus(fname)) for fname in file_list]
    combined = Nexus.combine(nexi)
    sets = combined.append_sets()
    concat_file = outdir + '/phylip/concatdata.phylip'
    combined.export_phylip(concat_file)
    charset_file = outdir + '/phylip/charsets.charsets'
    with open(charset_file, 'w') as outf:
        outf.write(sets)
    outf.close()
def concatNexAlns(
        nexDir,
        outname,
        same_taxa=True):  #from https://biopython.org/wiki/Concatenate_nexus
    """Combine multiple nexus data matrices in one partitioned file.
    By default this will only work if the same taxa are present in each file
    use same_taxa=False if you are not concerned by this """
    filelist = [x for x in os.listdir(nexDir) if x.endswith('.nex')]
    nexi = [(os.path.join(nexDir,
                          fname), Nexus.Nexus(os.path.join(nexDir, fname)))
            for fname in filelist]
    coutname = 'concat_stree_aln_{}.nex'.format(outname)
    combined = Nexus.combine(nexi)
    combined.write_nexus_data(filename=open(coutname, 'w'))
    return coutname
def model_partition(metadata, aligns):
    to_combine = []
    start = 1
    end = 0
    new_metadata = OrderedDict()
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(open(os.path.join(aligns, "{0}.nex".format(locus))))
            end += nex.nchar
            to_combine.append((locus, nex))
        new_metadata[model] = (start, end)
        start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, new_metadata
Exemple #19
0
def model_partition(metadata, aligns):
    to_combine = []
    start = 1
    end = 0
    new_metadata = OrderedDict()
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(
                open(os.path.join(aligns, "{0}.nex".format(locus))))
            end += nex.nchar
            to_combine.append((locus, nex))
        new_metadata[model] = (start, end)
        start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, new_metadata
def model_partition(metadata, aligns):
    to_combine = []
    start = 1
    end = 0
    new_metadata = OrderedDict()
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(open(os.path.join(aligns, "{0}.nex".format(locus))))
            #s = sum([1 if 'copy' in n else 0 for n in nex.get_original_taxon_order()])
            #if s > 0:
            #    pdb.set_trace()
            end += nex.nchar
            to_combine.append((locus, nex))
        new_metadata[model] = (start, end)
        start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, new_metadata
def write_variant_sites(alignment, var_sites, outfile):
	nex_aligns = []  #Bio.Nexus.Nexus.Nexus objects
	blocks = list(ranges(var_sites))  #tuples of positions
	for i in blocks:
		alignment_iteration = MultipleSeqAlignment(alignment[:, i[0]:i[1]+1],
			alphabet=generic_dna).format('nexus')
		# if i[0] == i[1]:
		# 	nex_aligns.append(('site {}'.format(str(i[1] + 1)),
		# 		Nexus.Nexus(alignment_iteration)))
		# else:
		# 	nex_aligns.append(('site {} to {}'.format(str(i[0]), str(i[1] + 1)),
		# 		Nexus.Nexus(alignment_iteration)))
		nex_aligns.append(('site {} to {}'.format(str(i[0]), str(i[1]+1)),
			Nexus.Nexus(alignment_iteration)))

	combined = Nexus.combine(nex_aligns)
	with open(outfile, 'w') as out:
		combined.write_nexus_data(out)
	print 'Converted {} informative sites without gaps into nexus alignment'.format(str(len(blocks)))
def model_partition(metadata, aligns):
    to_combine = []
    start = 1
    end = 0
    new_metadata = OrderedDict()
    for model in metadata:
        for locus in metadata[model]:
            nex = Nexus.Nexus(
                open(os.path.join(aligns, "{0}.nex".format(locus))))
            #s = sum([1 if 'copy' in n else 0 for n in nex.get_original_taxon_order()])
            #if s > 0:
            #    pdb.set_trace()
            end += nex.nchar
            to_combine.append((locus, nex))
        new_metadata[model] = (start, end)
        start = end + 1
    combined = Nexus.combine(to_combine)
    #pdb.set_trace()
    return combined, new_metadata
Exemple #23
0
def _write_BEST(dataset, filestem):
  """ write a MrBayes block for BEST species tree estimation 
  
  Used by write_multispecies(), writes a concatenated nexus file and prints
  MrBayes block to screen.
  """
  fname = filestem + ".nex"
  #write a nexus file with partitions for each gene
  nexi = []
  for g in dataset.get_genes():
    nexi.append( (g, _nexify( dataset.get_sequences(g)))) 
  combined = Nexus.combine(nexi)
  combined.write_nexus_data(filename=fname)
  #then build a MrBayes block for BEST
  d = defaultdict(list)
  for sp, i in zip(dataset.get_species(),
                   [str(i) for i in xrange(1,len(dataset)+1)]):
    d[sp].append(i)
  contents = ["begin MyBayes;"]
  for species, OTUs in d.items():
      contents.append("taxset %s = % s" % (species, " ".join(OTUs)))
  print "Add the following to the MrBayes block in %s" % fname
  for line in contents:
      print line
def read_collapse(file, informat, gapchar):
    with open(file, 'r') as input_handle:

        alignment = AlignIO.read(input_handle, informat, alphabet=generic_dna)
        summary_align = AlignInfo.SummaryInfo(alignment)
        first_seq = (alignment[0].seq)
        length_first_seq = len(first_seq)

        # 		chars_to_ignore = ['N']
        my_pssm = summary_align.pos_specific_score_matrix(first_seq)

        index = 0
        count = 0
        invariant_sites_counter = 0
        invariant_position_index = []

        for i in my_pssm.pssm:
            A = i[1]['A']
            C = i[1]['C']
            G = i[1]['G']
            T = i[1]['T']
            if gapchar != None:
                print gapchar
                gap = i[1][gapchar]
                x = [gap, A, C, G, T]
            if gapchar == None:
                x = [A, C, G, T]
                print x
            y = []
            for j in x:
                if j > 0:
                    y.append(1)
                else:
                    y.append(0)
            if sum(y[1:len(y)]) > 1:
                pass
            else:
                invariant_sites_counter += 1
                invariant_position_index.append(count)
            count += 1

        alignment_indices_to_write = []
        n_alignments = []

        for i in range(0, length_first_seq):
            if i not in invariant_position_index:
                alignment_indices_to_write.append(i)

        def ranges(i):
            for a, b in itertools.groupby(enumerate(i), lambda (x, y): y - x):
                b = list(b)
                yield b[0][1], b[-1][1]

        blocks = list(ranges(alignment_indices_to_write))
        print '\nExcluding', str(
            len(invariant_position_index
                )), 'sites at positions:', invariant_position_index, '\n'
        print 'Including sites at positions:', blocks, '\n'
        for i in blocks:
            alignment_iteration = MultipleSeqAlignment(
                alignment[:,
                          i[0]:i[1] + 1], alphabet=generic_dna).format('nexus')
            n_alignments.append(('site' + str(i[0]) + 'to' + str(i[1] + 1),
                                 Nexus.Nexus(alignment_iteration)))  #

        #combine the alignments in n_alignments
        combined = Nexus.combine(n_alignments)
        with open(file + '_collapsed.nexus', 'w') as output_handle:
            print 'Writing collapsed alignment to:', file + '_collapsed.nexus\n'
            combined.write_nexus_data(output_handle)
Exemple #25
0
__author__ = 'anastasiiakorosteleva'
from Bio.Nexus import Nexus
# the combine function takes a list of tuples [(name, nexus instance)...],
#if we provide the file names in a list we can use a list comprehension to
# create these tuples

file_list = ['apoa1.nex', 'apoe.nex', 'cyt450.nex', 'ace.nex', 'ABO.nex', "apoa5.nex", 'apod.nex', 'cdk6.nex', 'CETP.nex',
             'ETV6.nex', 'Gckr.nex', 'gdf5.nex','LDLR.nex', 'lpl.nex', 'NAT2.nex', 'park2.nex', 'SLC22A5.nex', 'UGT1A9.nex',
             'HMGA2.nex', 'apoc1.nex']
nexuses = [(fname, Nexus.Nexus(fname)) for fname in file_list]

combined = Nexus.combine(nexuses)
combined.write_nexus_data(filename=open('combo.nex', 'w'))
Exemple #26
0
# a little script to concatenate lots of nexus files in a folder
# and write a new one.
from Bio.Nexus import Nexus
import os

infile  = "/Users/robertlanfear/Desktop/turtles-individual-nexus-files-for-loci"


file_list = [x for x in os.walk(infile)][0][2]
nexi =  [(fname, Nexus.Nexus(fname)) for fname in file_list]
combined = Nexus.combine(nexi)
outfile = os.path.join(infile, "alignment.nex")
combined.write_nexus_data(filename=open(outfile, 'w'))
def main(inDir, outName, fext='.gb'):

    # MAKE OUTPUT FOLDER
    outDir = os.path.join(inDir, 'output_AlignmentFiles')
    if not os.path.exists(outDir):
        os.makedirs(outDir)

    # EXTRACT AND COLLECT CDS FROM RECORDS
    files = [f for f in os.listdir(inDir) if f.endswith(fext)]
    masterdict_nucl = collections.OrderedDict()
    masterdict_prot = collections.OrderedDict()
    for f in files:
        extract_collect_CDS(masterdict_nucl, masterdict_prot,
                            os.path.join(inDir, f))

    # REMOVE ALL DUPLICATE ENTRIES (result of CDS with multiple exons)
    remove_duplicates(masterdict_nucl)
    remove_duplicates(masterdict_prot)
    # Note: Not sure why I have to run this removal twice, but not all
    #       duplicates are removed first time around.
    remove_duplicates(masterdict_nucl)
    remove_duplicates(masterdict_prot)

    # ALIGN AND WRITE TO FILE
    if masterdict_nucl.items():
        for k, v in masterdict_nucl.iteritems():
            outFn_unalign_nucl = os.path.join(outDir,
                                              'nucl_' + k + '.unalign.fas')
            # Write unaligned nucleotide sequences
            with open(outFn_unalign_nucl, 'w') as hndl:
                SeqIO.write(v, hndl, 'fasta')
    if not masterdict_nucl.items():
        sys.exit('  ERROR: No items in nucleotide masterdictionary.')

    if masterdict_prot.items():
        for k, v in masterdict_prot.iteritems():
            outFn_unalign_prot = os.path.join(outDir,
                                              'prot_' + k + '.unalign.fas')
            outFn_aligned_prot = os.path.join(outDir,
                                              'prot_' + k + '.aligned.fas')
            # WRITE UNALIGNED PROTEIN SEQUENCES
            with open(outFn_unalign_prot, 'w') as hndl:
                SeqIO.write(v, hndl, 'fasta')
            # ALIGN SEQUENCES
            #import subprocess
            #subprocess.call(['mafft', '--auto', outFn_unalign_prot, '>', outFn_aligned_prot])
            mafft_cline = MafftCommandline(input=outFn_unalign_prot)
            stdout, stderr = mafft_cline()
            with open(outFn_aligned_prot, 'w') as hndl:
                hndl.write(stdout)
    if not masterdict_prot.items():
        sys.exit('  ERROR: No items in protein masterdictionary.')

    # BACK-TRANSLATION via Python script by Peter Cook
    # https://github.com/peterjc/pico_galaxy/tree/master/tools/align_back_trans
    for k, v in masterdict_prot.iteritems():
        outFn_unalign_nucl = os.path.join(outDir, 'nucl_' + k + '.unalign.fas')
        outFn_aligned_nucl = os.path.join(outDir, 'nucl_' + k + '.aligned.fas')
        outFn_aligned_prot = os.path.join(outDir, 'prot_' + k + '.aligned.fas')
        try:
            log = subprocess.check_output([
                'python2', 'align_back_trans.py', 'fasta', outFn_aligned_prot,
                outFn_unalign_nucl, outFn_aligned_nucl, '11'
            ],
                                          stderr=subprocess.STDOUT)
        except:
            print '  ERROR: Error encountered during back-translation of', k
            #print log

    # IMPORT BACK-TRANSLATIONS AND CONCATENATE
    alignm_L = []
    for k in masterdict_prot.keys():
        aligned_nucl_fasta = os.path.join(outDir, 'nucl_' + k + '.aligned.fas')
        aligned_nucl_nexus = os.path.join(outDir, 'nucl_' + k + '.aligned.nex')
        # Convert from fasta to nexus
        try:
            alignm_fasta = AlignIO.read(aligned_nucl_fasta,
                                        'fasta',
                                        alphabet=Alphabet.generic_dna)
            hndl = StringIO()
            AlignIO.write(alignm_fasta, hndl, 'nexus')
            nexus_string = hndl.getvalue()
            nexus_string = nexus_string.replace(
                '\n' + k + '_', '\ncombined_'
            )  # IMPORTANT: Stripping the gene name from the sequence name
            alignm_nexus = Nexus.Nexus(nexus_string)
            alignm_L.append(
                (k, alignm_nexus))  # Function 'Nexus.combine' needs a tuple.
        except:
            print '  ERROR: Cannot process alignment of', k

    # COMBINE THE NEXUS ALIGNMENTS (IN NO PARTICULAR ORDER)
    n_aligned_CDS = len(alignm_L)
    alignm_combined = Nexus.combine(
        alignm_L)  # Function 'Nexus.combine' needs a tuple.
    outFn_nucl_combined_nexus = os.path.join(
        inDir,
        outName + '_nucl_' + str(n_aligned_CDS) + 'combined.aligned.nex')
    alignm_combined.write_nexus_data(
        filename=open(outFn_nucl_combined_nexus, 'w'))
Exemple #28
0
__author__ = 'anastasiiakorosteleva'
from Bio.Nexus import Nexus
# the combine function takes a list of tuples [(name, nexus instance)...],
#if we provide the file names in a list we can use a list comprehension to
# create these tuples

file_list = [
    'apoa1.nex', 'apoe.nex', 'cyt450.nex', 'ace.nex', 'ABO.nex', "apoa5.nex",
    'apod.nex', 'cdk6.nex', 'CETP.nex', 'ETV6.nex', 'Gckr.nex', 'gdf5.nex',
    'LDLR.nex', 'lpl.nex', 'NAT2.nex', 'park2.nex', 'SLC22A5.nex',
    'UGT1A9.nex', 'HMGA2.nex', 'apoc1.nex'
]
nexuses = [(fname, Nexus.Nexus(fname)) for fname in file_list]

combined = Nexus.combine(nexuses)
combined.write_nexus_data(filename=open('combo.nex', 'w'))
Exemple #29
0
#                     newSeq = str(sequence.seq).ljust(maxLen, '-')
#                     sequence.seq = Seq.Seq(newSeq)
#
#             for line in add_handle:
#                 input_handle.append(line)
#
#
#     saveName = 'Test/'+geneName+'.nex'
#     output_handle = open(saveName, 'w')
#     AlignIO.write(input_handle, output_handle, "nexus")

lethrinids = glob.glob('NEX/COMBINED*Renamed_Simple.nex')
outgroups = glob.glob('Outgroups/*Simple.nex')

nexi = [(j, Nexus.Nexus(j)) for j in lethrinids]
lethrinidsCombined = Nexus.combine(nexi)
lethrinidsCombined.write_nexus_data(
    filename=open('OUT/Lethrinids_combined_all.nex', 'w'))

nexi2 = [(j, Nexus.Nexus(j)) for j in outgroups]
outgroupsCombined = Nexus.combine(nexi2)
outgroupsCombined.write_nexus_data(
    filename=open('OUT/Outgroups_combined_all.nex', 'w'))

# combined = glob.glob('Test/*Simple.nex')
# nexi = [(j, Nexus.Nexus(j)) for j in combined]
# combined = Nexus.combine(nexi)
# combined.write_nexus_data(filename=open('Test/COMBINED_All.nex', 'w'))
# combos = [f, fam]
# nexi = [(j, Nexus.Nexus(j)) for j in combos]
# combined = Nexus.combine(nexi)
#!/usr/bin/env python

from Bio.Nexus import Nexus

gene1 = open(
    'gene1.nex',
    'r')  # For each gene you want to combine, change name of Nexus file.
gene2 = open('gene2.nex', 'r')

# Keep adding similar lines for the number of genes you want to combine

allGenes = (gene1, gene2)  # Extend this tuple as needed

concatenate = Nexus.combine([(i.name, Nexus.Nexus(i)) for i in allGenes])
concatOutFile = "outputFileName.nex"  # Change this to a meaningful output filename
concatenate.write_nexus_data(filename=concatOutFile)
Exemple #31
0
def Concatenate(prefix):
    file_list = glob('*.nex')   
    nexi =  [(fname, Nexus.Nexus(fname)) for fname in file_list]
    combined = Nexus.combine(nexi)
    combined.write_nexus_data(filename=open('btCOMBINED.nex', 'w'))
    combined.export_phylip(prefix+'.phy')
def read_collapse(file, informat, gapchar):
	with open(file, 'r') as input_handle:

		alignment = AlignIO.read(input_handle, informat, alphabet=generic_dna)
		summary_align = AlignInfo.SummaryInfo(alignment)
		first_seq = (alignment[0].seq)
		length_first_seq = len(first_seq)

# 		chars_to_ignore = ['N']
		my_pssm = summary_align.pos_specific_score_matrix(first_seq)

		index = 0
		count = 0
		invariant_sites_counter = 0
		invariant_position_index = []

		for i in my_pssm.pssm:
			A = i[1]['A']
			C = i[1]['C']
			G = i[1]['G']
			T = i[1]['T']
			if gapchar != None:
				print gapchar
				gap = i[1][gapchar]
				x = [gap, A, C, G, T]
			if gapchar == None:
				x = [A, C, G, T]
				print x
			y = []
			for j in x:
				if j > 0:
					y.append(1)
				else:
					y.append(0)
			if sum(y[1:len(y)]) > 1:
				pass
			else:
				invariant_sites_counter += 1
				invariant_position_index.append(count)
			count += 1

		alignment_indices_to_write = []
		n_alignments = []

		for i in range(0,length_first_seq):
			if i not in invariant_position_index:
				alignment_indices_to_write.append(i)

		def ranges(i):
			for a, b in itertools.groupby(enumerate(i), lambda (x, y): y - x):
				b = list(b)
				yield b[0][1], b[-1][1]

		blocks = list(ranges(alignment_indices_to_write))
		print '\nExcluding', str(len(invariant_position_index)),'sites at positions:',invariant_position_index,'\n'
		print 'Including sites at positions:',blocks,'\n'
		for i in blocks:
			alignment_iteration = MultipleSeqAlignment(alignment[:,i[0]:i[1]+1], alphabet = generic_dna).format('nexus')
			n_alignments.append(('site'+str(i[0])+'to'+str(i[1]+1),Nexus.Nexus(alignment_iteration))) #

		#combine the alignments in n_alignments
		combined = Nexus.combine(n_alignments)
		with open(file+'_collapsed.nexus', 'w') as output_handle:
			print 'Writing collapsed alignment to:',file+'_collapsed.nexus\n'
			combined.write_nexus_data(output_handle)
from Bio import Alphabet
from Bio.Nexus import Nexus
from Bio import SeqIO
import sys

# Expected arguments: (1) output file name, (2)-(N) input fasta files

print("Converting fasta files to nexus...")
print("")

nexusFiles = []

for fastaIn in sys.argv[2:]:
	print(fastaIn)
	SeqIO.convert(fastaIn, "fasta", fastaIn.replace(".fasta",".nex"), "nexus",alphabet=Alphabet.IUPAC.IUPACAmbiguousDNA())
	nexusFiles.append(fastaIn.replace(".fasta",".nex"))	

print("Concatenating alignments...")

nexusFileHandles = []

for nex in nexusFiles:
	nexusFileHandles.append(open(nex,'r'))

concatenate = Nexus.combine([(i.name,Nexus.Nexus(i)) for i in nexusFileHandles])
concatenate.write_nexus_data(filename=sys.argv[1])
	
def concat_alignment(files, output):
    nexi = [(fname.replace(alignments_dir, '').replace(".", "").replace("-", ""), Nexus.Nexus(fname)) for fname in
            files]
    combined = Nexus.combine(nexi)
    combined.write_nexus_data(filename=open(output, 'w'))
Exemple #35
0
mainDir = os.getcwd()

for g in glob.glob('*_sims'):
	# pull out gene name
	gene = g.split("_")[0]
	# create path to gene folder
	geneDirPath = os.path.join(mainDir,g)
	# move into gene folder
	os.chdir(geneDirPath)
	for p in glob.glob('posterior_predictive_sim_*'):
		simNum = p.split("_")[3]
		# make name for concat nexus file
		concatNex = gene + "_" + simNum + ".nex"
		# make folder for sim seq
		mbRunDirPath = os.path.join(mainDir, gene + "_" + simNum)
		nexOutPath = os.path.join(mbRunDirPath,concatNex)
		if not os.path.exists(mbRunDirPath):
			os.mkdir(mbRunDirPath)
		#debug
		print simNum, concatNex, mbRunDirPath, nexOutPath

		# move into sim seq folder
		os.chdir(p)
		seqList =["phyloSeq[1].nex", "phyloSeq[2].nex", "phyloSeq[3].nex"]
		nexConvert =  [(f, Nexus.Nexus(f)) for f in seqList]
		combine = Nexus.combine(nexConvert)
		combine.write_nexus_data(filename=open(nexOutPath, 'w'))
		os.chdir(geneDirPath)
	os.chdir(mainDir)

from Bio.Nexus import Nexus
import sys

#usage : python concat_nex.py filelist.txt

filename = sys.argv[
    1]  # provide a list of files in nexus format as text file in command line argument
with open(filename, 'r') as f:
    file_list = f.read().split('\n')
for i in file_list:  # print the list of input files
    print(i)

nexi = [(fname, Nexus.Nexus(fname)) for fname in file_list]
combined = Nexus.combine(nexi)
combined.write_nexus_data(filename=open(
    'concatenated.nex', 'w'))  # writes output file named as concatenated.nex
Exemple #37
0
#!/usr/bin/env python

# Author: Gregory S Mendez

# This script will create a super matrix alignment file in nexus format from input alignments in nexus format

# Named variables. Every run needs the following defined:
# 1) --in_dir - The directory containing the nexus alignments that need to be merged.
# 2) --out - The full filepath and name you want for the output file.

from Bio.Nexus import Nexus
import argparse, glob

# Argument Parser
parser = argparse.ArgumentParser(description = 'This script will create a super matrix alignment file from input alignments')
parser.add_argument('--in_dir', required=True, help='The input directory containing alignment files.')
parser.add_argument('--out', required=True, help='The filepath and filename of the output file.')
args = parser.parse_args() 

IN_DIR = args.in_dir
OUT = args.out
FILE_LIST = glob.glob('%s/*.nex' % IN_DIR)
NEXI =  [(FNAME, Nexus.Nexus(FNAME)) for FNAME in FILE_LIST]
COMBINED = Nexus.combine(NEXI)
COMBINED.write_nexus_data(filename=open('%s' % OUT, 'w'))