Python DieGracefully Examples, CHURPipelines.DieGracefully Python Examples

Example #1

0

Show file

File: BulkRNAseq.py Project: msi-ris/CHURP

 def _prepare_samplesheet(self):
     """Call the samplesheet build method here. This will build the
     dictionary that will hold all samplesheet data, and then write it into
     the output directory."""
     self._run_checks()
     is_pe = self.sheet.compile(self.real_out, self.real_work)
     # We want to throw an error if there is a mix of PE and SE samples.
     if len(set(is_pe)) > 1:
         # If we get here, then we should separate the list of samples into
         # those that are SE and those that are PE
         se = []
         pe = []
         for samp in sorted(self.sheet.final_sheet):
             # Check for entries in the R1 and R2 slots
             r1 = self.sheet.final_sheet[samp]['FastqR1files']
             r2 = self.sheet.final_sheet[samp]['FastqR2file']
             grp = self.sheet.final_sheet[samp]['Group']
             sname = samp + ' (Group: ' + grp + ')'
             if r1 and not r2:
                 se.append(sname)
             elif r1 and r2:
                 pe.append(sname)
         DieGracefully.die_gracefully(DieGracefully.PE_SE_MIX, pe, se)
     ss_path = self.sheet.write_sheet(self.real_out, self.pipe_name, '|')
     return ss_path

Example #2

0

Show file

 def _prepare_output(self):
     """Make sure the parent directory of the output directory exists and
     can be written into. Make directories that we have permission to
     make."""
     # Get the dirname of the output file to make sure that the directory
     # exists and can be written to
     par = os.path.dirname(self.dest)
     self.group_logger.info('Checking directories.')
     # We need to check the output directory and the working directory
     self.group_logger.debug('Checking output directory %s', par)
     # First,check that it exists
     if dir_funcs.dir_exists(par, self.group_logger):
         # And lastly, is it writeable?
         if dir_funcs.dir_writeable(par, self.group_logger):
             # All good!
             self.group_logger.debug('Output dir %s is valid', par)
             pass
         else:
             self.group_logger.error('Output dir %s cannot be written to!',
                                     par)
             DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR)
     else:
         self.group_logger.warning(
             'Output dir %s does not exist, making it', par)
         s = dir_funcs.make_dir(par, self.group_logger)
         if not s:
             DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR)
     pass

Example #3

0

Show file

 def _validate(self, a):
     """Validate the arguments. We want to make sure that the FASTQ
     directory is not empty, the columns do not collide with each other, and
     that the names do not have any commas in them."""
     self._validate_fastq_folder(a['fq_folder'])
     # Drop a warning that specifying extra columns means that there will be
     # some more specialized statistical analysis required
     if a['extra_column']:
         self.group_logger.warning(
             'Specifying additional columns for experimental conditions '
             'is an advanced feature, and will require you to write custom '
             'scripts for statistical analysis. gopher-pipelines will do '
             'tests on the "Group" column (present by default), but will '
             'not account for additional experimental details in your '
             'design. This is not an error message.')
     # Check the experimental columns - first make sure that the names are
     # not duplicated
     tot_col = self.columns + a['extra_column']
     if len(tot_col) != len(set(tot_col)):
         self.group_logger.warning(
             'Duplicate columns specified. This will not cause an error ' +
             'in the Python script, but it may cause an error in any ' +
             'downstream statistical analysis.')
     # Check the supplied columns for bad values
     for e in a['extra_column']:
         if ',' in e:
             self.group_logger.error('Column names cannot contain commas.')
             DieGracefully.die_gracefully(DieGracefully.GROUP_BAD_COL)
     # Turn relative paths into absolute paths
     a['fq_folder'] = os.path.realpath(os.path.expanduser(a['fq_folder']))
     return a

Example #4

0

Show file

File: BulkRNAseq.py Project: msi-ris/CHURP

 def _validate_hisat_idx(self, i):
     """Raise an error if the provided HISAT2 index is not complete -
     all of the [1-8].ht2l? files should be present."""
     # Build glob patterns for the normal and long indices
     norm_idx = i + '.[1-8].ht2'
     long_idx = i + '.[1-8].ht2l'
     # Do the search
     self.pipe_logger.debug('Searching for %s', norm_idx)
     norm_idx_files = glob.glob(norm_idx)
     self.pipe_logger.debug('Found %i idx files', len(norm_idx_files))
     # There should be 8 total
     if len(norm_idx_files) == 8:
         return
     else:
         self.pipe_logger.debug(
             'Normal idx not found. Searching for long idx.')
         long_idx_files = glob.glob(long_idx)
         self.pipe_logger.debug(
             'Found %i long idx files', len(long_idx_files))
         if len(long_idx_files) == 8:
             return
         else:
             self.pipe_logger.error('Cound not find HISAT2 idx files!')
             DieGracefully.die_gracefully(DieGracefully.BAD_HISAT)
     return

Example #5

0

Show file

File: BulkRNASeqSampleSheet.py Project: msi-ris/CHURP

 def _set_groups(self, groups):
     """If the groups argument is NoneType, then there were no experimental
     groups passed to the pipeline script, and we fill in 'NULL' for each
     group. If it was passed, then we parse it for group memberships. We set
     any overlapping sample groups to be the same value. We report any
     non-overlapping samples as warnings, and complete non-overlap as an
     error."""
     if not groups:
         self.sheet_logger.debug(
             'No groups file passed. All samples are NULL group.')
         for s in self.samples:
             self.samples[s]['Group'] = 'NULL'
         return
     else:
         self.sheet_logger.debug('Parsing %s for groups.', groups)
         csv_gps = {}
         with open(groups, 'r') as f:
             for index, line in enumerate(f):
                 if index == 0:
                     continue
                 elif line.strip() == '':
                     self.sheet_logger.warn(
                         'Line %d in groups csv is empty, skipping.', index)
                 elif len(line.strip().split(',')) < 2:
                     self.sheet_logger.warn(
                         ('Line %d in groups csv has fewer than 2 fields, '
                          'skipping.'), index)
                 else:
                     tmp = line.strip().split(',')
                     csv_gps[tmp[0]] = tmp[1]
         self.sheet_logger.debug('CSV experimental groups:\n%s',
                                 pprint.pformat(csv_gps))
         # Calculate the overlaps
         fq_samples = set(self.samples)
         csv_samples = set(csv_gps)
         fq_only = fq_samples - csv_samples
         csv_only = csv_samples - fq_samples
         overlap = fq_samples & csv_samples
         # Thow an error if there are no overlapping samples
         if len(overlap) == 0:
             self.sheet_logger.error(
                 'The FASTQ directory and CSV file appear to mismatch.')
             DieGracefully.die_gracefully(DieGracefully.BRNASEQ_NO_SAMP_GPS)
         # Drop warnings if there are exclusive samples
         if len(fq_only) > 0:
             self.sheet_logger.warning(
                 'Samples found that do not have CSV entries, setting to '
                 'NULL group:\n%s', '\n'.join(list(fq_only)))
         if len(csv_only) > 0:
             self.sheet_logger.warning(
                 'Ignoring samples with groups but no reads in the FASTQ '
                 'directory:\n%s', '\n'.join(list(csv_only)))
         # Iterate through the sample dictionary and set groups
         for s in self.samples:
             gp = csv_gps.get(s, 'NULL')
             self.sheet_logger.debug('Sample %s gets group %s.', s, gp)
             self.samples[s]['Group'] = gp
         return

Example #6

0

Show file

File: churp.py Project: msi-ris/CHURP

 def brnaseq_group(a):
     """Sub-function for calling the bulk RNAseq group template."""
     from CHURPipelines.ExperimentGroup import BulkRNAseqGroup
     eg = BulkRNAseqGroup.BulkRNAseqGroup(args)
     eg.setup(args)
     eg.write_sheet()
     DieGracefully.die_gracefully(
         DieGracefully.BRNASEQ_GROUP_SUCCESS,
         eg.dest)
     return

Example #7

0

Show file

File: Pipeline.py Project: msi-ris/CHURP

 def _check_scheduler(self):
     """Check that the scheduler resource requests make sense. ppn should be
     between 1 and 24; mem should be between 2000 and 62000; walltime should
     be between 2h and 96h; and the queues should be one of the valid queues
     on Mesabi or Mangi."""
     try:
         assert self.ppn >= 1 and self.ppn <= 24
     except AssertionError as e:
         self.logger.error(
             'PPN value of %i is invalid! Please specify between 1 and 24.',
             self.ppn)
         DieGracefully.die_gracefully(DieGracefully.BAD_RESOURCES)
     try:
         assert self.mem >= 1 and self.mem <= 62000
     except AssertionError as e:
         self.logger.error(
             'Mem value of %i is invalid! Specify between 1 and 62000.',
             self.mem)
         DieGracefully.die_gracefully(DieGracefully.BAD_RESOURCES)
     try:
         assert self.walltime >= 1 and self.walltime <= 96
     except AssertionError as e:
         self.logger.error(
             'Walltime value of %i is invalid! Specify between 1 and 96.',
             self.walltime)
         DieGracefully.die_gracefully(DieGracefully.BAD_RESOURCES)
     try:
         assert self.msi_queue in CHURPipelines.QUEUES
     except AssertionError as e:
         self.logger.error('Queue %s is not in the allowed list of queues.',
                           self.msi_queue)
         DieGracefully.die_gracefully(DieGracefully.BAD_QUEUE)
     return

Example #8

0

Show file

File: args.py Project: msi-ris/CHURP

def check_for_bad(a):
    """Check for bad characters in the args and throw an error if it detects
    any of them. These are characters that let users terminate the current
    command and start another, e.g., a file called 'sample_01; rm -rf ~'"""
    v = vars(a)
    bad_chars = [';', '#', '|', '$(', '<', '>', '`']
    for option in v:
        if not v[option]:
            continue
        else:
            for bc in bad_chars:
                if bc in str(v[option]):
                    DieGracefully.die_gracefully(
                        DieGracefully.NEFARIOUS_CHAR, str(v[option]))
    return

Example #9

0

Show file

File: Pipeline.py Project: msi-ris/CHURP

    def _check_dirs(self):
        """Check that the directories exist and are readable and writeable.
        This will raise an error if we cannot find the fastq directory, or the
        output directory cannot be written to."""
        self.logger.info('Checking directories.')
        # We need to check the output directory and the working directory
        self.logger.debug('Checking output directory %s', self.outdir)
        # First,check that it exists
        if dir_funcs.dir_exists(self.outdir, self.logger):
            # Is it empty?
            if dir_funcs.dir_empty(self.outdir, self.logger):
                # And lastly, is it writeable?
                if dir_funcs.dir_writeable(self.outdir, self.logger):
                    # All good!
                    self.logger.debug('Output dir %s is valid', self.outdir)
                    pass
                else:
                    self.logger.error('Output dir %s cannot be written to!',
                                      self.outdir)
                    DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR)
            else:
                self.logger.warning(
                    'Output dir %s is not empty! Results may be clobbered.',
                    self.outdir)
        else:
            self.logger.warning('Output dir %s does not exist, making it',
                                self.outdir)
            s = dir_funcs.make_dir(self.outdir, self.logger)
            if not s:
                DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR)

        # And do the same for the work directory
        self.logger.debug('Checking working directory %s', self.workdir)
        # First,check that it exists
        if dir_funcs.dir_exists(self.workdir, self.logger):
            # Is it empty?
            if dir_funcs.dir_empty(self.workdir, self.logger):
                # And lastly, is it writeable?
                if dir_funcs.dir_writeable(self.workdir, self.logger):
                    # All good!
                    self.logger.debug('Working dir %s is valid', self.workdir)
                    pass
                else:
                    self.logger.error('Working dir %s cannot be written to!',
                                      self.workdir)
                    DieGracefully.die_gracefully(DieGracefully.BAD_WORKDIR)
            else:
                self.logger.warning('Working dir %s is not empty!',
                                    self.workdir)
        else:
            self.logger.warning('Working dir %s does not exist, making it',
                                self.workdir)
            s = dir_funcs.make_dir(self.workdir, self.logger)
            if not s:
                DieGracefully.die_gracefully(DieGracefully.BAD_WORKDIR)
        return

Example #10

0

Show file

 def _validate_fastq_folder(self, d):
     """Raise an error if the FASTQ directory does not exist or does not
     have any FASTQ files."""
     try:
         contents = os.listdir(d)
     except OSError:
         DieGracefully.die_gracefully(DieGracefully.BAD_FASTQ)
     # Check if there is at least one file ending in a standard fastq suffix
     fq_pat = re.compile(r'^.+((.fq(.gz)?$)|(.fastq(.gz)?$))')
     has_fastq = False
     for f in contents:
         if re.match(fq_pat, f):
             has_fastq = True
             break
     if has_fastq:
         return
     else:
         DieGracefully.die_gracefully(DieGracefully.EMPTY_FASTQ)
     return

Example #11

0

Show file

File: churp.py Project: msi-ris/CHURP

def brnaseq(args):
    """This function loads the bulk RNAseq pipeline module, and runs through
    the steps for bulk RNAseq analysis."""
    from CHURPipelines.Pipelines import BulkRNAseq
    p = BulkRNAseq.BulkRNAseqPipeline(args)
    p.setup(args)
    pipeline_fname, samplesheet_fname, key_name, qsub_dat = p.qsub()
    if not qsub_dat:
        DieGracefully.die_gracefully(
            DieGracefully.BRNASEQ_SUCCESS,
            pipeline_fname,
            samplesheet_fname,
            key_name)
    elif qsub_dat[2].returncode != 0:
        DieGracefully.die_gracefully(
            DieGracefully.BRNASEQ_SUBMIT_FAIL,
            qsub_dat)
    else:
        DieGracefully.die_gracefully(
            DieGracefully.BRNASEQ_SUBMIT_OK,
            pipeline_fname,
            samplesheet_fname,
            key_name,
            qsub_dat)
    return

Example #12

0

Show file

 def _get_fq_paths(self, d):
     """Read through the contents of a FASTQ directory and try to build a
     list of samples from it."""
     # Write a regular expression that will match the parts of the filename
     # that come after the sample name
     samp_re = re.compile(r'(_S[0-9]+)?'
                          r'(_[ATCG]{4,})?'
                          r'(_L00[1-8])?'
                          r'(_R(1|2))?_001\.((fq(\.gz)?$)|(fastq(\.gz)?$))')
     # Get all files that look like not-R2 fastq files, make the matching
     # case-insensitive.
     fq_re = re.compile(r'^.+[^_R2]_001\.((fq(\.gz)?$)|(fastq(\.gz)?$))',
                        flags=re.I)
     cont = os.listdir(d)
     # From the Illumina BaseSpace online documentation, this is what the
     # standard filenames will look like:
     #   SampleName_SX_L00Y_R1_001.fastq.gz
     # X: Sample nummber in samplesheet
     # Y: Lane number
     # R1/2: Fwd/reverse
     # 001: Always 001.
     # This is similar to the UMGC filenames, which are split across lanes
     # and then concatenated.
     # We will also define a regex for finding SRA-like samples. This should
     # work alongside the default regex. This will just look for files that
     # are named '*_1.fastq.gz' or similar. It is not great, but the SRA
     # does not have a very specific format.
     sra_re = re.compile(r'^.+_1\.((fq(\.gz)?$)|(fastq(\.gz)?$))')
     sra_samp_re = re.compile(r'_(1|2)\.((fq(\.gz)?$)|(fastq(\.gz)?$))')
     # We will iterate through all files in the directory. If they look like
     # R1 fastq files, we will extract the samplename from them. We will
     # then build the R2 filename and ask if that exists in the directory
     for f in cont:
         if re.match(fq_re, f):
             # Extract the samplename from the fastq name
             sn = re.sub(samp_re, '', f)
             # Tack it onto the samples dictionary
             self.samples[sn] = {}
             self.samples[sn]['R1'] = os.path.join(d, f)
             # Look for the R2. This is really dumb-looking but:
             #   Reverse the R1 filename ([::-1])
             #   Replace 1R with 2R, with at most 1 replacement
             #   Reverse it again
             r2 = f[::-1].replace('1R', '2R', 1)[::-1]
             # Extract the samplename from the hypothetical R2 path. If it
             # is different from the R1 samplename, then we have messed up
             # the part of the filename that we shouldn't have - the R2 does
             # not exist for this sammple, and it is single-end
             r2_sn = re.sub(samp_re, '', r2)
             if r2_sn != sn:
                 self.samples[sn]['R2'] = ''
             elif r2 not in cont or r2 == f:
                 self.samples[sn]['R2'] = ''
             elif r2 in cont and r2 != f:
                 self.samples[sn]['R2'] = os.path.join(d, r2)
             else:
                 self.samples[sn]['R2'] = ''
         elif re.match(sra_re, f):
             sn = re.sub(sra_samp_re, '', f)
             self.samples[sn] = {}
             self.samples[sn]['R1'] = os.path.join(d, f)
             r2 = f[::-1].replace('1_', '2_', 1)[::-1]
             r2_sn = re.sub(sra_samp_re, '', r2)
             if r2_sn != sn:
                 self.samples[sn]['R2'] = ''
             elif r2 not in cont or r2 == f:
                 self.samples[sn]['R2'] = ''
             elif r2 in cont and r2 != f:
                 self.samples[sn]['R2'] = os.path.join(d, r2)
             else:
                 self.samples[sn]['R2'] = ''
     self.sheet_logger.debug('Found samples:\n%s',
                             pprint.pformat(self.samples))
     # Simple check - if there are no samples, then throw an error here
     if len(self.samples) == 0:
         self.sheet_logger.error(
             'No valid FASTQ files were found in the directory.')
         DieGracefully.die_gracefully(DieGracefully.EMPTY_FASTQ)
     return

Example #13

0

Show file

 def _group_help(self, args):
     """Do a simple check to see if the pipe_group argument is set. If it
     isn't, print a nice help message."""
     if not args['pipe_group']:
         DieGracefully.die_gracefully(DieGracefully.GROUP_NO_PIPE)
     return

Example #14

0

Show file

File: BulkRNAseq.py Project: msi-ris/CHURP

 def _validate_args(self, a):
     """Validate arguments for the BulkRNAseqPipeline object. We define it
     in this file because it only really needs to be accessible to this
     subclass. Argument dependencies are pipeline-specific. For the bulk
     RNAseq analysis pipeline:
         - FASTQ dir and UMGC sheet are mutually exclusive
         - Organism and HISAT2 index + GTF are mutually exclusive
         - Check that the HISAT2 index is complete
     Further, sanitize the paths of the output dir, working dir, and hisat2
     index.
     """
     # Check the completeness of the argument dictionary. -f must be
     # specified and (-x and -g) or -r must be specified. After checking the
     # FASTQ folder, check the helper commands
     if not a['fq_folder']:
         DieGracefully.die_gracefully(DieGracefully.BRNASEQ_INC_ARGS)
     elif not ((a['hisat2_idx'] and a['gtf']) or a['organism']):
         DieGracefully.die_gracefully(DieGracefully.BRNASEQ_INC_ARGS)
     elif (a['hisat2_idx'] and a['gtf']) and a['organism']:
         DieGracefully.die_gracefully(DieGracefully.BRNASEQ_CONFLICT)
     # Convert all of the paths into absolute paths
     a['fq_folder'] = os.path.realpath(
         os.path.expanduser(str(a['fq_folder'])))
     a['hisat2_idx'] = os.path.realpath(
         os.path.expanduser(str(a['hisat2_idx'])))
     a['gtf'] = os.path.realpath(os.path.expanduser(str(a['gtf'])))
     a['outdir'] = os.path.realpath(os.path.expanduser(str(a['outdir'])))
     a['workdir'] = os.path.realpath(os.path.expanduser(str(a['workdir'])))
     if a['expr_groups']:
         a['expr_groups'] = os.path.realpath(os.path.expanduser(str(
             a['expr_groups'])))
     try:
         assert a['headcrop'] >= 0
     except AssertionError:
         DieGracefully.die_gracefully(
             DieGracefully.BAD_NUMBER, '--headcrop')
     try:
         assert a['mincpm'] >= 0
     except AssertionError:
         DieGracefully.die_gracefully(
             DieGracefully.BAD_NUMBER, '--min-cpm')
     try:
         assert a['rrna_screen'] >= 0
     except AssertionError:
         DieGracefully.die_gracefully(
             DieGracefully.BAD_NUMBER, '--rrna_screen')
     try:
         assert a['subsample'] >= 0
         if a['subsample'] > 0:
             assert a['subsample'] >= a['rrna_screen']
     except AssertionError:
         DieGracefully.die_gracefully(
             DieGracefully.BAD_NUMBER, '--subsample')
     try:
         assert a['mem'] >= 12000
     except AssertionError:
         DieGracefully.die_gracefully(
             DieGracefully.BAD_NUMBER, '--mem')
     try:
         assert a['walltime'] >= 2
     except AssertionError:
         DieGracefully.die_gracefully(
             DieGracefully.BAD_NUMBER, '--walltime')
     self.pipe_logger.debug('GTF: %s', a['gtf'])
     self.pipe_logger.debug('Adapters: %s', a['adapters'])
     self.pipe_logger.debug('FASTQ Folder: %s', a['fq_folder'])
     self.pipe_logger.debug('Output Dir: %s', a['outdir'])
     self.pipe_logger.debug('Working Dir: %s', a['workdir'])
     self.pipe_logger.debug('HISAT2 Idx: %s', a['hisat2_idx'])
     self.pipe_logger.debug('Expr Groups: %s', a['expr_groups'])
     self.pipe_logger.debug('Strandness: %s', a['strand'])
     # Check that the adapters and GTF file exist
     try:
         handle = open(a['gtf'], 'r')
         handle.close()
     except OSError:
         DieGracefully.die_gracefully(DieGracefully.BAD_GTF)
     if not a['adapters']:
         a['adapters'] = '$TRIMMOMATIC/adapters/all_illumina_adapters.fa'
     else:
         try:
             a['adapters'] = os.path.realpath(
                 os.path.expanduser(str(a['adapters'])))
             handle = open(a['adapters'], 'r')
             handle.close()
         except OSError:
             DieGracefully.die_gracefully(DieGracefully.BAD_ADAPT)
     if a['expr_groups']:
         try:
             handle = open(a['expr_groups'], 'r')
             handle.close()
         except OSError:
             DieGracefully.die_gracefully(DieGracefully.BRNASEQ_BAD_GPS)
     # Validate the FASTQ folder
     self._validate_fastq_folder(a['fq_folder'])
     # Validate the hisat2 index
     self._validate_hisat_idx(a['hisat2_idx'])
     # Sanitize the hisat2 index path
     a['hisat2_idx'] = dir_funcs.sanitize_path(
         a['hisat2_idx'], self.pipe_logger)
     return a

Example #15

0

Show file

File: BulkRNAseq.py Project: msi-ris/CHURP

 def qsub(self):
     """Write the qsub command. We will need the path to the samplesheet,
     the number of samples in the samplesheet, and the scheduler options
     that were passed as arguments. This is defined in the subclass rather
     than in the main Pipeline class because the exact form of the qsub
     command depends on which pipeline we are running."""
     ss = self._prepare_samplesheet()
     # Make the qsub array key
     keyname = default_files.default_array_key(self.pipe_name)
     keyname = os.path.join(self.real_out, keyname)
     if os.path.isfile(keyname):
         self.pipe_logger.warning(
             'Sbatch key file %s exists. Overwriting!', keyname)
     try:
         handle = open(keyname, 'w')
     except OSError:
         DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR)
     # The sheet is sorted in this way before it is written to disk, so it
     # should be safe to sort it this way
     handle.write('Sbatch.Index\tSampleName\n')
     for index, samplename in enumerate(sorted(self.sheet.final_sheet)):
         handle.write(str(index+1) + '\t' + samplename + '\n')
     handle.flush()
     handle.close()
     # Make the script filename
     pname = default_files.default_pipeline(self.pipe_name)
     pname = os.path.join(self.real_out, pname)
     if os.path.isfile(pname):
         self.pipe_logger.warning(
             'Submission script %s already exists. Overwriting!', pname)
     try:
         handle = open(pname, 'w')
     except OSError:
         DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR)
     # Write the header of the script
     handle.write('#!/bin/bash\n')
     # Write some meta-data lines
     handle.write('# Generated by CHURP version ' +
                  CHURPipelines.__version__ + '\n')
     handle.write('# Generated at ' + CHURPipelines.NOW + '\n')
     handle.write('set -e\n')
     handle.write('set -u\n')
     handle.write('set -o pipefail\n')
     self.pipe_logger.debug(
         'Number of samples: %i', len(self.sheet.final_sheet))
     self.pipe_logger.debug('Samplesheet: %s', ss)
     # Write command to figure out email address of submitting user
     handle.write('user_name="$(id -u -n)"\n')
     handle.write('user_email="${user_name}@umn.edu"\n')
     # Set the group string here
     if self.group:
         qsub_group = '-A ' + self.group
     else:
         qsub_group = ''
     qsub_array = '1'
     if len(self.sheet.final_sheet) > 1:
         qsub_array += '-' + str(len(self.sheet.final_sheet))
     # Write a few variables into the header of the script so they are
     # easy to find
     handle.write('KEYFILE=' + '"' + keyname + '"\n')
     handle.write('QSUB_ARRAY=' + '"' + qsub_array + '"\n')
     handle.write('OUTDIR=' + '"' + str(self.real_out) + '"\n')
     handle.write('WORKDIR=' + '"' + str(self.real_work) + '"\n')
     handle.write('DE_SCRIPT=' + '"' + self.de_script + '"\n')
     handle.write('REPORT_SCRIPT=' + '"' + self.report_script + '"\n')
     handle.write('SAMPLESHEET=' + '"' + ss + '"\n')
     handle.write('PURGE=' + '"' + self.purge + '"\n')
     handle.write('RRNA_SCREEN=' + '"' + self.rrna_screen + '"\n')
     handle.write('SUBSAMPLE=' + '"' + self.subsample + '"\n')
     handle.write('PIPE_SCRIPT="$(cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )/$(basename $0)"\n')
     # These are the variables we want to export into the single sample job
     # script.
     single_cmd_vars = ','.join([
         'SampleSheet="${SAMPLESHEET}"',
         'PURGE="${PURGE}"',
         'RRNA_SCREEN="${RRNA_SCREEN}"',
         'SUBSAMPLE="${SUBSAMPLE}"'
         ])
     aln_cmd = [
         'sbatch',
         '--parsable',
         '--ignore-pbs',
         '-p', self.msi_queue,
         '--mail-type=ALL',
         '--mail-user="******"',
         qsub_group,
         '-o', '"${OUTDIR}/bulk_rnaseq_single_sample-%A.%a.out"',
         '-e', '"${OUTDIR}/bulk_rnaseq_single_sample-%A.%a.err"',
         '-N', '1',
         '--mem=' + str(self.mem) + 'mb',
         '--tmp=12gb',
         '-n', '1',
         '-c', str(self.ppn),
         '--time=' + str(self.walltime * 60),
         '--array="${QSUB_ARRAY}"',
         '--export=' + single_cmd_vars,
         self.single_sample_script,
         '||',
         'exit',
         '1']
     # Write the first qsub command
     handle.write('single_id=$(' + ' '.join(aln_cmd) + ')\n')
     # This is the command for counting and normalizing reads
     summary_vars = ','.join([
         'SampleSheet="${SAMPLESHEET}"',
         'MINLEN="' + self.min_gene_len + '"',
         'MINCPM="' + self.min_cpm + '"',
         'RSUMMARY="${DE_SCRIPT}"',
         'PIPE_SCRIPT="${PIPE_SCRIPT}"',
         'BULK_RNASEQ_REPORT="${REPORT_SCRIPT}"'])
     summary_cmd = [
         'sbatch',
         '--parsable',
         '--ignore-pbs',
         '-p', self.msi_queue,
         '--mail-type=ALL',
         '--mail-user="******"',
         qsub_group,
         '-o', '"${OUTDIR}/run_summary_stats-%j.out"',
         '-e', '"${OUTDIR}/run_summary_stats-%j.err"',
         '-N', '1',
         '--mem=' + str(self.mem) + 'mb',
         '--tmp=12gb',
         '-n', '1',
         '-c', str(self.ppn),
         '--time=720',
         '--depend=afterok:${single_id}',
         '--export=' + summary_vars,
         self.summary_script,
         '||',
         'exit',
         '1']
     # Write the second command
     handle.write('summary_id=$(' + ' '.join(summary_cmd) + ')\n')
     # Write some echo statements for users' information
     handle.write('echo "Output and logs will be written to ${OUTDIR}"\n')
     handle.write('echo "Emails will be sent to ${user_email}"\n')
     handle.write('echo "Sbatch array to samplename key: ${KEYFILE}"\n')
     handle.write('echo "Single samples job array ID: ${single_id}"\n')
     handle.write('echo "Summary job ID: ${summary_id}"\n')
     self.pipe_logger.debug('sbatch:\n%s', ' '.join(aln_cmd))
     self.pipe_logger.debug('sbatch:\n%s', ' '.join(summary_cmd))
     handle.flush()
     handle.close()
     # Check if we want to automatically submit the script
     if self.nosubmit:
         qsub_dat = None
     else:
         qsub_cmd = ['bash', pname]
         qsub_proc = subprocess.Popen(
             qsub_cmd,
             shell=False,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE)
         qsub_stdout, qsub_stderr = qsub_proc.communicate()
         qsub_dat = (qsub_stdout, qsub_stderr, qsub_proc)
     return (pname, ss, keyname, qsub_dat)