def main(): args = get_args() # setup logging log, my_name = setup_logging(args.verbosity, args.log_path) text = " Starting {} ".format(my_name) log.info(text.center(65, "=")) alignments = [] log.info("Getting aligned sequences for trimming") for ftype in get_file_extensions(args.input_format): alignments.extend(glob.glob(os.path.join(args.input, "*{}".format(ftype)))) # package up needed arguments for map() package = [args.input_format, args.window, args.threshold, args.proportion, args.max_divergence, args.min_length] params = zip([package] * len(alignments), alignments) log.info("Alignment begins. 'X' indicates dropped alignments (these are reported after alignment)") # if --multprocessing, use Pool.map(), else use map() # can also extend to MPI map, but not really needed on multicore # machine if args.cores > 1: assert args.cores <= multiprocessing.cpu_count(), "You've specified more cores than you have" pool = multiprocessing.Pool(args.cores - 1) alignments = pool.map(get_and_trim_alignments, params) else: alignments = map(get_and_trim_alignments, params) # kick the stdout down one line since we were using sys.stdout print("") # drop back into logging log.info("Alignment ends") # write the output files write_alignments_to_outdir(log, args.output, alignments, args.output_format) # end text = " Completed {} ".format(my_name) log.info(text.center(65, "="))
def main(args): # setup logging log, my_name = setup_logging(args) # create the fasta dictionary loci = get_fasta_dict(log, args) log.info("Aligning with {}".format(str(args.aligner).upper())) opts = [[args.window, args.threshold, args.no_trim, args.proportion, args.max_divergence, args.min_length] \ for i in range(len(loci))] # combine loci and options params = zip(loci.items(), opts) log.info("Alignment begins. 'X' indicates dropped alignments (these are reported after alignment)") # During alignment, drop into sys.stdout for progress indicator # because logging in multiprocessing is more painful than what # we really need. Return to logging when alignment completes. if args.cores > 1: assert args.cores <= multiprocessing.cpu_count(), "You've specified more cores than you have" pool = multiprocessing.Pool(args.cores) alignments = pool.map(align, params) else: alignments = map(align, params) # kick the stdout down one line since we were using sys.stdout print("") # drop back into logging log.info("Alignment ends") # write the output files write_alignments_to_outdir(log, args.output, alignments, args.output_format) # end text = " Completed {} ".format(my_name) log.info(text.center(65, "="))
def main(args): if args.aligner == "muscle": from phyluce.muscle import Align as align_class elif args.aligner == "mafft": from phyluce.mafft import Align as align_class # create the fasta dictionary loci = get_fasta_dict(log, args) log.info("Aligning with {}".format(str(args.aligner).upper())) opts = [[args.window, args.threshold, args.no_trim, args.proportion, args.max_divergence, args.min_length, align_class] \ for _ in loci] # combine loci and options params = zip(loci.items(), opts) log.info("Alignment begins. 'X' indicates dropped alignments (these are reported after alignment)") # During alignment, drop into sys.stdout for progress indicator # because logging in multiprocessing is more painful than what # we really need. Return to logging when alignment completes. if args.cores > 1: assert args.cores <= multiprocessing.cpu_count(), "You've specified more cores than you have" pool = multiprocessing.Pool(args.cores) alignments = pool.map(align, params) else: alignments = map(align, params) #import pickle #with open('/Users/tobias/Desktop/alignments.pickle', 'wb') as handle: # pickle.dump(alignments, handle, protocol=pickle.HIGHEST_PROTOCOL) #with open('/Users/tobias/Desktop/alignments.pickle', 'rb') as handle: # alignments = pickle.load(handle) # kick the stdout down one line since we were using sys.stdout print("") # drop back into logging log.info("Alignment ends") # write the output files for name, alignment in alignments: if alignment.trimmed: for t in alignment.trimmed: t.id = t.id.split('_', 3)[3] t.name = t.id t.description = '' write_alignments_to_outdir(log, args.output, alignments, args.output_format) # end text = " Completed! " log.info(text.center(65, "="))
def main(args): if args.aligner == "muscle": from phyluce.muscle import Align as align_class elif args.aligner == "mafft": from phyluce.mafft import Align as align_class # create the fasta dictionary loci = get_fasta_dict(log, args) log.info("Aligning with {}".format(str(args.aligner).upper())) opts = [[ args.window, args.threshold, args.no_trim, args.proportion, args.max_divergence, args.min_length, args.gap_opening_penalty, args.gap_extension_penalty, align_class ] for _ in loci] # combine loci and options params = zip(loci.items(), opts) log.info( "Alignment begins. 'X' indicates dropped alignments (these are reported after alignment)" ) # During alignment, drop into sys.stdout for progress indicator # because logging in multiprocessing is more painful than what # we really need. Return to logging when alignment completes. if args.cores > 1: assert args.cores <= multiprocessing.cpu_count( ), "You've specified more cores than you have" pool = multiprocessing.Pool(args.cores) alignments = pool.map(align, params) else: alignments = map(align, params) #import pickle #with open('/Users/tobias/Desktop/alignments.pickle', 'wb') as handle: # pickle.dump(alignments, handle, protocol=pickle.HIGHEST_PROTOCOL) #with open('/Users/tobias/Desktop/alignments.pickle', 'rb') as handle: # alignments = pickle.load(handle) # kick the stdout down one line since we were using sys.stdout print("") # drop back into logging log.info("Alignment ends") # write the output files for name, alignment in alignments: if alignment.trimmed: for t in alignment.trimmed: locus_name = t.description.split('|')[-1] rest_of_string = '|'.join(t.description.split('|')[:-1]) string_to_replace = '%s_' % str(locus_name) new_string = t.id # fix the fasta header, also removing the occasional _R_ resulting from reverse contigs tmp = re.sub(string_to_replace, '', new_string, 1) t.id = re.sub('_R_', '', tmp, 1) t.name = t.id t.description = '' write_alignments_to_outdir(log, args.output, alignments, args.output_format) try: #input_folder = '/'.join(args.sequences.split('/')[:-2]) pickle_path = os.path.join(args.output, '.secapr_files') if not os.path.exists(pickle_path): os.makedirs(pickle_path) with open(os.path.join(pickle_path, 'sequence_origin.pickle'), 'wb') as handle: pickle.dump(args.sequences, handle, protocol=pickle.HIGHEST_PROTOCOL) # end text = " Completed! " log.info(text.center(65, "=")) except: print('Could not pass origin of sequences to %s' % pickle_path)