Example #1
0
def main(commandline_args=None):
    parser, opts, args = parse_command_line_parameters(**script_info)

    if not opts.sff_fp:
        parser.error('Required option flowgram file path (-i) not specified')
    elif not files_exist(opts.sff_fp):
        parser.error('Flowgram file path does not exist:\n %s \n Pass a valid one via -i.'
                     % opts.sff_fp)

    if(opts.checkpoint_fp):
        bp_fp = opts.checkpoint_fp
        if not exists(bp_fp):
            parser.error('Specified checkpoint file does not exist: %s' % bp_fp)

    #peek into sff.txt files to make sure they are parseable
    #cat_sff_fles is lazy and only reads header
    flowgrams, header = cat_sff_files(map(open, opts.sff_fp.split(',')))
    
    if(opts.split and opts.preprocess_fp):
        parser.error('Options --split and --preprocess_fp are exclusive')

    if(opts.preprocess_fp):
        pp_fp = opts.preprocess_fp
        if not exists(opts.preprocess_fp):
            parser.error('Specified preprocess directory does not exist: %s' % opts.preprocess_fp)
        if not files_exist('%s/prefix_mapping.txt,%s/prefix_dereplicated.fasta' %(pp_fp, pp_fp)):
            parser.error('Specified preprocess directory does not contain expected files: ' +\
                             'prefix_mapping.txt and prefix_dereplicated.fasta')

    if opts.titanium:
        opts.error_profile = DENOISER_DATA_DIR+'Titanium_error_profile.dat'
        opts.low_cutoff = 4
        opts.high_cutoff = 5

    if not exists(opts.error_profile):
        parser.error('Specified error profile %s does not exist' % opts.error_profile)

    if opts.output_dir:
        #make sure it always ends on /
        tmpoutdir=opts.output_dir+"/"
    else:
        #make random dir in current dir
        tmpoutdir = get_tmp_filename(tmp_dir="", prefix="denoiser_", suffix="/")

    create_dir(tmpoutdir, not opts.force)
    
    log_fp = 'denoiser.log'
    
    if opts.split:
        denoise_per_sample(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.cluster,
                           opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail,
                           opts.primer, opts.low_cutoff, opts.high_cutoff, log_fp,
                           opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter,
                           opts.titanium)
    else:
        denoise_seqs(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.preprocess_fp, opts.cluster,
                     opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer,
                     opts.low_cutoff, opts.high_cutoff, log_fp, opts.low_memory,
                     opts.verbose, opts.error_profile, opts.max_num_iter, opts.titanium,
                     opts.checkpoint_fp)
Example #2
0
def main(commandline_args=None):
    parser, opts, args = parse_command_line_parameters(**script_info)

    if not opts.sff_fp:
        parser.error('Required option flowgram file path (-i) not specified')
    elif not files_exist(opts.sff_fp):
        parser.error('Flowgram file path does not exist:\n %s \n Pass a valid one via -i.'
                     % opts.sff_fp)

    if(opts.checkpoint_fp):
        bp_fp = opts.checkpoint_fp
        if not exists(bp_fp):
            parser.error('Specified checkpoint file does not exist: %s' % bp_fp)

    #peek into sff.txt files to make sure they are parseable
    #cat_sff_fles is lazy and only reads header
    flowgrams, header = cat_sff_files(map(open, opts.sff_fp.split(',')))
    
    if(opts.split and opts.preprocess_fp):
        parser.error('Options --split and --preprocess_fp are exclusive')

    if(opts.preprocess_fp):
        pp_fp = opts.preprocess_fp
        if not exists(opts.preprocess_fp):
            parser.error('Specified preprocess directory does not exist: %s' % opts.preprocess_fp)
        if not files_exist('%s/prefix_mapping.txt,%s/prefix_dereplicated.fasta' %(pp_fp, pp_fp)):
            parser.error('Specified preprocess directory does not contain expected files: ' +\
                             'prefix_mapping.txt and prefix_dereplicated.fasta')

    if opts.titanium:
        opts.error_profile = DENOISER_DATA_DIR+'Titanium_error_profile.dat'
        opts.low_cutoff = 4
        opts.high_cutoff = 5

    if not exists(opts.error_profile):
        parser.error('Specified error profile %s does not exist' % opts.error_profile)

    if opts.output_dir:
        #make sure it always ends on /
        tmpoutdir=opts.output_dir+"/"
    else:
        #make random dir in current dir
        tmpoutdir = get_tmp_filename(tmp_dir="", prefix="denoiser_", suffix="/")

    create_dir(tmpoutdir, not opts.force)
    
    log_fp = 'denoiser.log'
    
    if opts.split:
        denoise_per_sample(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.cluster,
                           opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail,
                           opts.primer, opts.low_cutoff, opts.high_cutoff, log_fp,
                           opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter,
                           opts.titanium)
    else:
        denoise_seqs(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.preprocess_fp, opts.cluster,
                     opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer,
                     opts.low_cutoff, opts.high_cutoff, log_fp, opts.low_memory,
                     opts.verbose, opts.error_profile, opts.max_num_iter, opts.titanium,
                     opts.checkpoint_fp)
Example #3
0
def main(commandline_args=None):
    parser, opts, args = parse_command_line_parameters(**script_info)

    if not opts.sff_fp:
        parser.error('Required option flowgram file path (-i) not specified')
    elif not files_exist(opts.sff_fp):
        parser.error(
            'Flowgram file path does not exist:\n %s \n Pass a valid one via -i.'
            % opts.sff_fp)

    #make tmp and output dir
    tmp_dir = get_tmp_filename(tmp_dir=opts.output_dir + "/", suffix="/")
    try:
        makedirs(tmp_dir)
    except OSError:
        exit("Creating temporary directory failed")
    if (not exists(opts.output_dir)):
        try:
            makedirs(opts.output_dir)
        except OSError:
            exit("Creating output directory failed")

    #open logger
    log_fh = None
    if opts.verbose:
        #append to the log file of the master process
        log_fh = open(opts.output_dir + "/" + opts.log_fp, "a", 0)
        log_fh.write("SFF file: %s\n" % opts.sff_fp)
        log_fh.write("Fasta file: %s\n" % opts.fasta_fp)
        log_fh.write("Output dir: %s\n" % opts.output_dir)
        log_fh.write("Squeeze Seqs: %s\n" % opts.squeeze)
        log_fh.write("Primer sequence: %s\n" % opts.primer)

    (deprefixed_sff_fp, l, mapping, seqs) = \
        preprocess(opts.sff_fp, log_fh, fasta_fp=opts.fasta_fp,
                   out_fp=tmp_dir,
                   verbose=opts.verbose, squeeze=opts.squeeze,
                   primer=opts.primer)

    # explicitly close log file, as this file can be shared with the master
    # Closing it here assures that all preprocess writes happen before the
    # master writes
    if log_fh:
        log_fh.close()

    #move files to output dir
    rename(tmp_dir + "/prefix_dereplicated.sff.txt",
           opts.output_dir + "/prefix_dereplicated.sff.txt")
    rename(tmp_dir + "/prefix_dereplicated.fasta",
           opts.output_dir + "/prefix_dereplicated.fasta")
    rename(tmp_dir + "/prefix_mapping.txt",
           opts.output_dir + "/prefix_mapping.txt")
    rmdir(tmp_dir)
def main(commandline_args=None):
    parser, opts, args = parse_command_line_parameters(**script_info)
     
    if not opts.sff_fp:
        parser.error('Required option flowgram file path (-i) not specified')
    elif not files_exist(opts.sff_fp):
        parser.error('Flowgram file path does not exist:\n %s \n Pass a valid one via -i.'
                     % opts.sff_fp) 

    #make tmp and output dir
    tmp_dir = get_tmp_filename(tmp_dir = opts.output_dir+"/", suffix="/")
    try:
        makedirs(tmp_dir)
    except OSError:
        exit("Creating temporary directory failed")
    if(not exists(opts.output_dir)):
        try:
            makedirs(opts.output_dir)
        except OSError:
            exit("Creating output directory failed")
            
    #open logger
    log_fh=None
    if opts.verbose:
        #append to the log file of the master process
        log_fh = open(opts.output_dir+"/"+opts.log_fp, "a", 0)
        log_fh.write("SFF file: %s\n" % opts.sff_fp)
        log_fh.write("Fasta file: %s\n" % opts.fasta_fp)
        log_fh.write("Output dir: %s\n" % opts.output_dir)
        log_fh.write("Squeeze Seqs: %s\n" % opts.squeeze)
        log_fh.write("Primer sequence: %s\n" % opts.primer)

    (deprefixed_sff_fp, l, mapping, seqs) = \
        preprocess(opts.sff_fp, log_fh, fasta_fp=opts.fasta_fp,
                   out_fp=tmp_dir,
                   verbose=opts.verbose, squeeze=opts.squeeze,
                   primer=opts.primer)
        
    # explicitly close log file, as this file can be shared with the master
    # Closing it here assures that all preprocess writes happen before the
    # master writes    
    if log_fh:
        log_fh.close()

    #move files to output dir
    rename(tmp_dir+"/prefix_dereplicated.sff.txt",
           opts.output_dir+"/prefix_dereplicated.sff.txt")
    rename(tmp_dir+"/prefix_dereplicated.fasta",
           opts.output_dir+"/prefix_dereplicated.fasta")
    rename(tmp_dir+"/prefix_mapping.txt", opts.output_dir+"/prefix_mapping.txt")
    rmdir(tmp_dir)