Ejemplo n.º 1
0
 def validate_settings( self ):
     # Check the values of the supplied input files
     self.listFile = validate_input( self.listFile, ['.list'])
     self.ccsFile = validate_input( self.ccsFile, ['.fq', '.fastq'])
     # Check the value of the supplied distance
     validate_float( 'Distance', self.distance, minimum=0.001, 
                                                maximum=0.5)
Ejemplo n.º 2
0
 def validate_settings(self):
     # Check the values of the supplied input files
     self.listFile = validate_input(self.listFile, ['.list'])
     self.ccsFile = validate_input(self.ccsFile, ['.fq', '.fastq'])
     # Check the value of the supplied distance
     validate_float('Distance', self.distance, minimum=0.001, maximum=0.5)
Ejemplo n.º 3
0
 def validate_settings(self):
     # Check the values of the supplied input files
     self.listFile = validate_input(self.listFile, [".list"])
     self.ccsFile = validate_input(self.ccsFile, [".fq", ".fastq"])
     # Check the value of the supplied distance
     validate_float("Distance", self.distance, minimum=0.001, maximum=0.5)
Ejemplo n.º 4
0
def parse_args():
    """
    Parse the options for running the HLA pipeline and
    """
    desc = 'A pipeline tool for analyzing PacBio sequenced rRNA amplicons'
    parser = argparse.ArgumentParser(description=desc)

    add = parser.add_argument
    add('input_file',
        metavar='FILE',
        help="File of rRNA sequencing data to use")
    add('-r',
        '--raw_data',
        metavar='FILE',
        help='BasH5, BaxH5 or FOFN of raw H5-format sequence data')
    add('-a',
        '--min_accuracy',
        type=float,
        metavar='FLOAT',
        default=MIN_ACCURACY,
        help='Minimum predicted sequence accuracy to allow (%s)' %
        MIN_ACCURACY)
    add('-l',
        '--min_length',
        type=int,
        metavar='INT',
        default=MIN_LENGTH,
        help='Minimum length sequence to allow (%s)' % MIN_LENGTH)
    add('-s',
        '--min_snr',
        type=float,
        metavar='FLOAT',
        default=MIN_SNR,
        help='Minimum Signal-to-Noise ratio to allow (%s)' % MIN_SNR)
    add('-d',
        '--distance',
        type=float,
        metavar='FLOAT',
        default=DIST,
        help='Distance at which to cluster sequences (%s)' % DIST)
    add('-t',
        '--step',
        type=float,
        metavar='FLOAT',
        default=STEP,
        help="Step-size to use when clustering iteratively")
    add('-n',
        '--num_processes',
        type=int,
        metavar='INT',
        default=NPROC,
        dest='nproc',
        help='Number of processors to use (%s)' % NPROC)
    add('-f',
        '--fraction',
        type=float,
        metavar='FLOAT',
        default=FRACTION,
        help='Fraction of full-length to require of each read (%s)' % FRACTION)
    add('-o',
        '--output',
        dest='output_dir',
        metavar='DIR',
        default='rna_pipeline_run',
        help="Specify the output folder")
    add('-q',
        '--min_qv',
        type=int,
        metavar='INT',
        default=MIN_QV,
        help='Minimum QV to allow after sequence masking (%s)' % MIN_QV)
    add('-c',
        '--min_cluster_size',
        type=int,
        metavar='INT',
        default=MIN_CLUSTER_SIZE,
        help='Minimum cluster to generate consensus sequences (%s)' %
        MIN_CLUSTER_SIZE)
    add('--clustering_method',
        metavar='METHOD',
        dest='clusteringMethod',
        default=DEFAULT_METHOD,
        choices=CLUSTER_METHODS,
        help="Distance algorithm to use in clustering (%s)" % DEFAULT_METHOD)
    add('--precluster_diffs',
        type=int,
        metavar='INT',
        default=PRECLUSTER_DIFFS,
        help='Maximum number of differences to allow in pre-clustering (%s)' %
        PRECLUSTER_DIFFS)
    add('-A',
        '--alignment_reference',
        metavar='REF',
        default='silva.both.align',
        help="Reference MSA for aligning query sequences")
    add('-C',
        '--chimera_reference',
        metavar='REF',
        default='silva.gold.align',
        help="Reference MSA for Chimera detection")
    add('--sub_cluster',
        action="store_true",
        help="Subcluster each OTU to separate individual rDNA alleles")
    add('--disable_iteration',
        action='store_false',
        dest='enable_iteration',
        help="Turn off the iterative Clustering and Resequencing steps")
    add('--disable_consensus',
        action='store_false',
        dest='enable_consensus',
        help="Turn off the iterative Clustering and Resequencing steps")
    add('--blasr',
        metavar='BLASR_PATH',
        help="Specify the path to the Blasr executable")
    add('--mothur',
        metavar='MOTHUR_PATH',
        default='mothur',
        help="Specify the path to the Mothur executable")
    add('--debug', action='store_true', help="Turn on DEBUG message logging")
    add('--test_mode',
        action='store_true',
        help="Turn on current modifications being tested")

    class PrintVersionAction(argparse.Action):
        def __call__(self, parser, namespace, values, option_string=None):
            print "\tHLA Analysis Pipeline version: %s" % __VERSION__
            raise SystemExit

    add("--version", nargs=0, action=PrintVersionAction)

    parser.parse_args(namespace=args)
    initialize_logger(log, stream=sys.stdout, debug=args.debug)

    # Validate any input files
    if validate_file(args.alignment_reference) is None:
        msg = 'No alignment reference specified and default 16S Alignment reference not detected!'
        log.error(msg)
        raise ValueError(msg)

    if validate_file(args.chimera_reference) is None:
        msg = 'Default Chimera Reference not detected in PATH, falling back to Alignment Reference'
        log.warn(msg)
        args.chimera_reference = args.alignment_reference

    if args.enable_consensus and which('gcon.py') is None:
        msg = 'No copies of pbdagcon/gcon.py detected in PATH, disabling consensus'
        log.warn(msg)
        args.enable_consensus = False

    # Validate numerical parameters
    validate_int('NumProc', args.nproc, minimum=0)
    validate_float('Distance',
                   args.distance,
                   minimum=MIN_DIST,
                   maximum=MAX_DIST)