def main(commandline_args=None): parser, opts, args = parse_command_line_parameters(**script_info) if not opts.sff_fp: parser.error('Required option flowgram file path (-i) not specified') elif not files_exist(opts.sff_fp): parser.error('Flowgram file path does not exist:\n %s \n Pass a valid one via -i.' % opts.sff_fp) if(opts.checkpoint_fp): bp_fp = opts.checkpoint_fp if not exists(bp_fp): parser.error('Specified checkpoint file does not exist: %s' % bp_fp) #peek into sff.txt files to make sure they are parseable #cat_sff_fles is lazy and only reads header flowgrams, header = cat_sff_files(map(open, opts.sff_fp.split(','))) if(opts.split and opts.preprocess_fp): parser.error('Options --split and --preprocess_fp are exclusive') if(opts.preprocess_fp): pp_fp = opts.preprocess_fp if not exists(opts.preprocess_fp): parser.error('Specified preprocess directory does not exist: %s' % opts.preprocess_fp) if not files_exist('%s/prefix_mapping.txt,%s/prefix_dereplicated.fasta' %(pp_fp, pp_fp)): parser.error('Specified preprocess directory does not contain expected files: ' +\ 'prefix_mapping.txt and prefix_dereplicated.fasta') if opts.titanium: opts.error_profile = DENOISER_DATA_DIR+'Titanium_error_profile.dat' opts.low_cutoff = 4 opts.high_cutoff = 5 if not exists(opts.error_profile): parser.error('Specified error profile %s does not exist' % opts.error_profile) if opts.output_dir: #make sure it always ends on / tmpoutdir=opts.output_dir+"/" else: #make random dir in current dir tmpoutdir = get_tmp_filename(tmp_dir="", prefix="denoiser_", suffix="/") create_dir(tmpoutdir, not opts.force) log_fp = 'denoiser.log' if opts.split: denoise_per_sample(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.cluster, opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer, opts.low_cutoff, opts.high_cutoff, log_fp, opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter, opts.titanium) else: denoise_seqs(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.preprocess_fp, opts.cluster, opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer, opts.low_cutoff, opts.high_cutoff, log_fp, opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter, opts.titanium, opts.checkpoint_fp)
def main(commandline_args=None): parser, opts, args = parse_command_line_parameters(**script_info) if not opts.sff_fp: parser.error('Required option flowgram file path (-i) not specified') elif not files_exist(opts.sff_fp): parser.error( 'Flowgram file path does not exist:\n %s \n Pass a valid one via -i.' % opts.sff_fp) #make tmp and output dir tmp_dir = get_tmp_filename(tmp_dir=opts.output_dir + "/", suffix="/") try: makedirs(tmp_dir) except OSError: exit("Creating temporary directory failed") if (not exists(opts.output_dir)): try: makedirs(opts.output_dir) except OSError: exit("Creating output directory failed") #open logger log_fh = None if opts.verbose: #append to the log file of the master process log_fh = open(opts.output_dir + "/" + opts.log_fp, "a", 0) log_fh.write("SFF file: %s\n" % opts.sff_fp) log_fh.write("Fasta file: %s\n" % opts.fasta_fp) log_fh.write("Output dir: %s\n" % opts.output_dir) log_fh.write("Squeeze Seqs: %s\n" % opts.squeeze) log_fh.write("Primer sequence: %s\n" % opts.primer) (deprefixed_sff_fp, l, mapping, seqs) = \ preprocess(opts.sff_fp, log_fh, fasta_fp=opts.fasta_fp, out_fp=tmp_dir, verbose=opts.verbose, squeeze=opts.squeeze, primer=opts.primer) # explicitly close log file, as this file can be shared with the master # Closing it here assures that all preprocess writes happen before the # master writes if log_fh: log_fh.close() #move files to output dir rename(tmp_dir + "/prefix_dereplicated.sff.txt", opts.output_dir + "/prefix_dereplicated.sff.txt") rename(tmp_dir + "/prefix_dereplicated.fasta", opts.output_dir + "/prefix_dereplicated.fasta") rename(tmp_dir + "/prefix_mapping.txt", opts.output_dir + "/prefix_mapping.txt") rmdir(tmp_dir)
def main(commandline_args=None): parser, opts, args = parse_command_line_parameters(**script_info) if not opts.sff_fp: parser.error('Required option flowgram file path (-i) not specified') elif not files_exist(opts.sff_fp): parser.error('Flowgram file path does not exist:\n %s \n Pass a valid one via -i.' % opts.sff_fp) #make tmp and output dir tmp_dir = get_tmp_filename(tmp_dir = opts.output_dir+"/", suffix="/") try: makedirs(tmp_dir) except OSError: exit("Creating temporary directory failed") if(not exists(opts.output_dir)): try: makedirs(opts.output_dir) except OSError: exit("Creating output directory failed") #open logger log_fh=None if opts.verbose: #append to the log file of the master process log_fh = open(opts.output_dir+"/"+opts.log_fp, "a", 0) log_fh.write("SFF file: %s\n" % opts.sff_fp) log_fh.write("Fasta file: %s\n" % opts.fasta_fp) log_fh.write("Output dir: %s\n" % opts.output_dir) log_fh.write("Squeeze Seqs: %s\n" % opts.squeeze) log_fh.write("Primer sequence: %s\n" % opts.primer) (deprefixed_sff_fp, l, mapping, seqs) = \ preprocess(opts.sff_fp, log_fh, fasta_fp=opts.fasta_fp, out_fp=tmp_dir, verbose=opts.verbose, squeeze=opts.squeeze, primer=opts.primer) # explicitly close log file, as this file can be shared with the master # Closing it here assures that all preprocess writes happen before the # master writes if log_fh: log_fh.close() #move files to output dir rename(tmp_dir+"/prefix_dereplicated.sff.txt", opts.output_dir+"/prefix_dereplicated.sff.txt") rename(tmp_dir+"/prefix_dereplicated.fasta", opts.output_dir+"/prefix_dereplicated.fasta") rename(tmp_dir+"/prefix_mapping.txt", opts.output_dir+"/prefix_mapping.txt") rmdir(tmp_dir)