def get_files(input_dir, input_format): extensions = get_file_extensions(input_format) files = [] for ext in extensions: files.extend(glob.glob(os.path.join(os.path.expanduser(input_dir), '*{}*'.format(ext)))) # ensure we collapse duplicate filenames return list(set(files))
def main(): args = get_args() # setup logging log, my_name = setup_logging(args.verbosity, args.log_path) text = " Starting {} ".format(my_name) log.info(text.center(65, "=")) alignments = [] log.info("Getting aligned sequences for trimming") for ftype in get_file_extensions(args.input_format): alignments.extend(glob.glob(os.path.join(args.input, "*{}".format(ftype)))) # package up needed arguments for map() package = [args.input_format, args.window, args.threshold, args.proportion, args.max_divergence, args.min_length] params = zip([package] * len(alignments), alignments) log.info("Alignment begins. 'X' indicates dropped alignments (these are reported after alignment)") # if --multprocessing, use Pool.map(), else use map() # can also extend to MPI map, but not really needed on multicore # machine if args.cores > 1: assert args.cores <= multiprocessing.cpu_count(), "You've specified more cores than you have" pool = multiprocessing.Pool(args.cores - 1) alignments = pool.map(get_and_trim_alignments, params) else: alignments = map(get_and_trim_alignments, params) # kick the stdout down one line since we were using sys.stdout print("") # drop back into logging log.info("Alignment ends") # write the output files write_alignments_to_outdir(log, args.output, alignments, args.output_format) # end text = " Completed {} ".format(my_name) log.info(text.center(65, "="))
def get_files(input_dir, input_format): extensions = get_file_extensions(input_format) files = [] for ext in extensions: files.extend( glob.glob( os.path.join(os.path.expanduser(input_dir), '*{}*'.format(ext)))) # ensure we collapse duplicate filenames return list(set(files))
def write_alignments_to_outdir(outdir, alignments, format): print '\nWriting output files...' for tup in alignments: locus, aln = tup if aln.trimmed_alignment is not None: outname = "{}{}".format(os.path.join(outdir, locus), get_file_extensions(format)[0]) outf = open(outname, 'w') outf.write(aln.trimmed_alignment.format(format)) outf.close() else: print "\tSkipped writing {0}, there was no record".format(locus)
def write_alignments_to_outdir(outdir, alignments, format): print '\nWriting output files...' for tup in alignments: locus, aln = tup if aln: outname = "{}{}".format( os.path.join(outdir, locus), get_file_extensions(format)[0] ) outf = open(outname, 'w') outf.write(aln.trimmed_alignment.format(format)) outf.close() else: print "\tSkipped writing {0}, there was no record".format(locus)
def main(): args = get_args() alignments = [] for ftype in get_file_extensions(args.input_format): alignments.extend(glob.glob(os.path.join(args.alignments, "*{}".format(ftype)))) for count, f in enumerate(alignments): aln = AlignIO.read(f, args.input_format) for taxon in aln: if taxon.id == args.taxon: seq = str(taxon.seq).replace('-', '') locus = os.path.splitext(os.path.basename(f))[0] if not len(seq) == 0: args.output.write(">{0}\n{1}\n".format(locus, seq)) else: print locus args.output.close()
def main(): args = get_args() alignments = [] for ftype in get_file_extensions(args.input_format): alignments.extend(glob.glob(os.path.join(args.input, "*{}".format(ftype)))) # package up needed arguments for map() package = [args.input_format, args.window, args.threshold, args.proportion] params = zip([package] * len(alignments), alignments) # print some output for user sys.stdout.write('Trimming') sys.stdout.flush() # if --multprocessing, use Pool.map(), else use map() # can also extend to MPI map, but not really needed on multicore # machine if args.multiprocessing: pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1) alignments = pool.map(get_and_trim_alignments, params) else: alignments = map(get_and_trim_alignments, params) write_alignments_to_outdir(args.output, alignments, args.output_format)
def main(): args = get_args() alignments = [] for ftype in get_file_extensions(args.input_format): alignments.extend( glob.glob(os.path.join(args.input, "*{}".format(ftype)))) # package up needed arguments for map() package = [args.input_format, args.window, args.threshold, args.proportion] params = zip([package] * len(alignments), alignments) # print some output for user sys.stdout.write('Trimming') sys.stdout.flush() # if --multprocessing, use Pool.map(), else use map() # can also extend to MPI map, but not really needed on multicore # machine if args.cores > 1: pool = multiprocessing.Pool(args.cores - 1) alignments = pool.map(get_and_trim_alignments, params) else: alignments = map(get_and_trim_alignments, params) write_alignments_to_outdir(args.output, alignments, args.output_format)
def get_files(input_dir, input_format): alignments = [] for ftype in get_file_extensions(input_format): alignments.extend( glob.glob(os.path.join(input_dir, "*{}".format(ftype)))) return alignments
def get_files(input_dir, input_format): alignments = [] for ftype in get_file_extensions(input_format): alignments.extend(glob.glob(os.path.join(input_dir, "*{}".format(ftype)))) return alignments