def fast_denoiser(sff_fps, fasta_fp, tmp_outdir, num_cpus, primer, verbose=True, titanium=False): """wrapper function calling methods from the Denoiser package.""" if num_cpus > 1: denoise_seqs( sff_fps, fasta_fp, tmp_outdir, primer=primer, cluster=True, num_cpus=num_cpus, verbose=verbose, titanium=titanium, ) else: denoise_seqs(sff_fps, fasta_fp, tmp_outdir, primer=primer, verbose=verbose, titanium=titanium) # read centroids and singletons centroids = parse_fasta(open(tmp_outdir + "/centroids.fasta")) singletons = parse_fasta(open(tmp_outdir + "/singletons.fasta")) seqs = chain(centroids, singletons) # read mapping mapping = {} cluster_mapping = open(tmp_outdir + "/denoiser_mapping.txt") for i, cluster in enumerate(cluster_mapping): cluster, members = cluster.split(":") members = members.split() clust = [cluster] clust.extend(members) mapping[i] = clust return seqs, mapping
def main(commandline_args=None): parser, opts, args = parse_command_line_parameters(**script_info) if not opts.sff_fp: parser.error('Required option flowgram file path (-i) not specified') elif not files_exist(opts.sff_fp): parser.error('Flowgram file path does not exist:\n %s \n Pass a valid one via -i.' % opts.sff_fp) if(opts.checkpoint_fp): bp_fp = opts.checkpoint_fp if not exists(bp_fp): parser.error('Specified checkpoint file does not exist: %s' % bp_fp) #peek into sff.txt files to make sure they are parseable #cat_sff_fles is lazy and only reads header flowgrams, header = cat_sff_files(map(open, opts.sff_fp.split(','))) if(opts.split and opts.preprocess_fp): parser.error('Options --split and --preprocess_fp are exclusive') if(opts.preprocess_fp): pp_fp = opts.preprocess_fp if not exists(opts.preprocess_fp): parser.error('Specified preprocess directory does not exist: %s' % opts.preprocess_fp) if not files_exist('%s/prefix_mapping.txt,%s/prefix_dereplicated.fasta' %(pp_fp, pp_fp)): parser.error('Specified preprocess directory does not contain expected files: ' +\ 'prefix_mapping.txt and prefix_dereplicated.fasta') if opts.titanium: opts.error_profile = DENOISER_DATA_DIR+'Titanium_error_profile.dat' opts.low_cutoff = 4 opts.high_cutoff = 5 if not exists(opts.error_profile): parser.error('Specified error profile %s does not exist' % opts.error_profile) if opts.output_dir: #make sure it always ends on / tmpoutdir=opts.output_dir+"/" else: #make random dir in current dir tmpoutdir = get_tmp_filename(tmp_dir="", prefix="denoiser_", suffix="/") create_dir(tmpoutdir, not opts.force) log_fp = 'denoiser.log' if opts.split: denoise_per_sample(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.cluster, opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer, opts.low_cutoff, opts.high_cutoff, log_fp, opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter, opts.titanium) else: denoise_seqs(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.preprocess_fp, opts.cluster, opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer, opts.low_cutoff, opts.high_cutoff, log_fp, opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter, opts.titanium, opts.checkpoint_fp)
def fast_denoiser( sff_fps, fasta_fp, tmp_outdir, num_cpus, primer, verbose=True, titanium=False): """wrapper function calling methods from the Denoiser package.""" if num_cpus > 1: denoise_seqs(sff_fps, fasta_fp, tmp_outdir, primer=primer, cluster=True, num_cpus=num_cpus, verbose=verbose, titanium=titanium) else: denoise_seqs(sff_fps, fasta_fp, tmp_outdir, primer=primer, verbose=verbose, titanium=titanium) # read centroids and singletons centroids = MinimalFastaParser(open(tmp_outdir + "/centroids.fasta")) singletons = MinimalFastaParser(open(tmp_outdir + "/singletons.fasta")) seqs = chain(centroids, singletons) # read mapping mapping = {} cluster_mapping = open(tmp_outdir + "/denoiser_mapping.txt") for i, cluster in enumerate(cluster_mapping): cluster, members = cluster.split(':') members = members.split() clust = [cluster] clust.extend(members) mapping[i] = clust return seqs, mapping