def _prepare_samplesheet(self): """Call the samplesheet build method here. This will build the dictionary that will hold all samplesheet data, and then write it into the output directory.""" self._run_checks() is_pe = self.sheet.compile(self.real_out, self.real_work) # We want to throw an error if there is a mix of PE and SE samples. if len(set(is_pe)) > 1: # If we get here, then we should separate the list of samples into # those that are SE and those that are PE se = [] pe = [] for samp in sorted(self.sheet.final_sheet): # Check for entries in the R1 and R2 slots r1 = self.sheet.final_sheet[samp]['FastqR1files'] r2 = self.sheet.final_sheet[samp]['FastqR2file'] grp = self.sheet.final_sheet[samp]['Group'] sname = samp + ' (Group: ' + grp + ')' if r1 and not r2: se.append(sname) elif r1 and r2: pe.append(sname) DieGracefully.die_gracefully(DieGracefully.PE_SE_MIX, pe, se) ss_path = self.sheet.write_sheet(self.real_out, self.pipe_name, '|') return ss_path
def _prepare_output(self): """Make sure the parent directory of the output directory exists and can be written into. Make directories that we have permission to make.""" # Get the dirname of the output file to make sure that the directory # exists and can be written to par = os.path.dirname(self.dest) self.group_logger.info('Checking directories.') # We need to check the output directory and the working directory self.group_logger.debug('Checking output directory %s', par) # First,check that it exists if dir_funcs.dir_exists(par, self.group_logger): # And lastly, is it writeable? if dir_funcs.dir_writeable(par, self.group_logger): # All good! self.group_logger.debug('Output dir %s is valid', par) pass else: self.group_logger.error('Output dir %s cannot be written to!', par) DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR) else: self.group_logger.warning( 'Output dir %s does not exist, making it', par) s = dir_funcs.make_dir(par, self.group_logger) if not s: DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR) pass
def _validate(self, a): """Validate the arguments. We want to make sure that the FASTQ directory is not empty, the columns do not collide with each other, and that the names do not have any commas in them.""" self._validate_fastq_folder(a['fq_folder']) # Drop a warning that specifying extra columns means that there will be # some more specialized statistical analysis required if a['extra_column']: self.group_logger.warning( 'Specifying additional columns for experimental conditions ' 'is an advanced feature, and will require you to write custom ' 'scripts for statistical analysis. gopher-pipelines will do ' 'tests on the "Group" column (present by default), but will ' 'not account for additional experimental details in your ' 'design. This is not an error message.') # Check the experimental columns - first make sure that the names are # not duplicated tot_col = self.columns + a['extra_column'] if len(tot_col) != len(set(tot_col)): self.group_logger.warning( 'Duplicate columns specified. This will not cause an error ' + 'in the Python script, but it may cause an error in any ' + 'downstream statistical analysis.') # Check the supplied columns for bad values for e in a['extra_column']: if ',' in e: self.group_logger.error('Column names cannot contain commas.') DieGracefully.die_gracefully(DieGracefully.GROUP_BAD_COL) # Turn relative paths into absolute paths a['fq_folder'] = os.path.realpath(os.path.expanduser(a['fq_folder'])) return a
def _validate_hisat_idx(self, i): """Raise an error if the provided HISAT2 index is not complete - all of the [1-8].ht2l? files should be present.""" # Build glob patterns for the normal and long indices norm_idx = i + '.[1-8].ht2' long_idx = i + '.[1-8].ht2l' # Do the search self.pipe_logger.debug('Searching for %s', norm_idx) norm_idx_files = glob.glob(norm_idx) self.pipe_logger.debug('Found %i idx files', len(norm_idx_files)) # There should be 8 total if len(norm_idx_files) == 8: return else: self.pipe_logger.debug( 'Normal idx not found. Searching for long idx.') long_idx_files = glob.glob(long_idx) self.pipe_logger.debug( 'Found %i long idx files', len(long_idx_files)) if len(long_idx_files) == 8: return else: self.pipe_logger.error('Cound not find HISAT2 idx files!') DieGracefully.die_gracefully(DieGracefully.BAD_HISAT) return
def _set_groups(self, groups): """If the groups argument is NoneType, then there were no experimental groups passed to the pipeline script, and we fill in 'NULL' for each group. If it was passed, then we parse it for group memberships. We set any overlapping sample groups to be the same value. We report any non-overlapping samples as warnings, and complete non-overlap as an error.""" if not groups: self.sheet_logger.debug( 'No groups file passed. All samples are NULL group.') for s in self.samples: self.samples[s]['Group'] = 'NULL' return else: self.sheet_logger.debug('Parsing %s for groups.', groups) csv_gps = {} with open(groups, 'r') as f: for index, line in enumerate(f): if index == 0: continue elif line.strip() == '': self.sheet_logger.warn( 'Line %d in groups csv is empty, skipping.', index) elif len(line.strip().split(',')) < 2: self.sheet_logger.warn( ('Line %d in groups csv has fewer than 2 fields, ' 'skipping.'), index) else: tmp = line.strip().split(',') csv_gps[tmp[0]] = tmp[1] self.sheet_logger.debug('CSV experimental groups:\n%s', pprint.pformat(csv_gps)) # Calculate the overlaps fq_samples = set(self.samples) csv_samples = set(csv_gps) fq_only = fq_samples - csv_samples csv_only = csv_samples - fq_samples overlap = fq_samples & csv_samples # Thow an error if there are no overlapping samples if len(overlap) == 0: self.sheet_logger.error( 'The FASTQ directory and CSV file appear to mismatch.') DieGracefully.die_gracefully(DieGracefully.BRNASEQ_NO_SAMP_GPS) # Drop warnings if there are exclusive samples if len(fq_only) > 0: self.sheet_logger.warning( 'Samples found that do not have CSV entries, setting to ' 'NULL group:\n%s', '\n'.join(list(fq_only))) if len(csv_only) > 0: self.sheet_logger.warning( 'Ignoring samples with groups but no reads in the FASTQ ' 'directory:\n%s', '\n'.join(list(csv_only))) # Iterate through the sample dictionary and set groups for s in self.samples: gp = csv_gps.get(s, 'NULL') self.sheet_logger.debug('Sample %s gets group %s.', s, gp) self.samples[s]['Group'] = gp return
def brnaseq_group(a): """Sub-function for calling the bulk RNAseq group template.""" from CHURPipelines.ExperimentGroup import BulkRNAseqGroup eg = BulkRNAseqGroup.BulkRNAseqGroup(args) eg.setup(args) eg.write_sheet() DieGracefully.die_gracefully( DieGracefully.BRNASEQ_GROUP_SUCCESS, eg.dest) return
def _check_scheduler(self): """Check that the scheduler resource requests make sense. ppn should be between 1 and 24; mem should be between 2000 and 62000; walltime should be between 2h and 96h; and the queues should be one of the valid queues on Mesabi or Mangi.""" try: assert self.ppn >= 1 and self.ppn <= 24 except AssertionError as e: self.logger.error( 'PPN value of %i is invalid! Please specify between 1 and 24.', self.ppn) DieGracefully.die_gracefully(DieGracefully.BAD_RESOURCES) try: assert self.mem >= 1 and self.mem <= 62000 except AssertionError as e: self.logger.error( 'Mem value of %i is invalid! Specify between 1 and 62000.', self.mem) DieGracefully.die_gracefully(DieGracefully.BAD_RESOURCES) try: assert self.walltime >= 1 and self.walltime <= 96 except AssertionError as e: self.logger.error( 'Walltime value of %i is invalid! Specify between 1 and 96.', self.walltime) DieGracefully.die_gracefully(DieGracefully.BAD_RESOURCES) try: assert self.msi_queue in CHURPipelines.QUEUES except AssertionError as e: self.logger.error('Queue %s is not in the allowed list of queues.', self.msi_queue) DieGracefully.die_gracefully(DieGracefully.BAD_QUEUE) return
def check_for_bad(a): """Check for bad characters in the args and throw an error if it detects any of them. These are characters that let users terminate the current command and start another, e.g., a file called 'sample_01; rm -rf ~'""" v = vars(a) bad_chars = [';', '#', '|', '$(', '<', '>', '`'] for option in v: if not v[option]: continue else: for bc in bad_chars: if bc in str(v[option]): DieGracefully.die_gracefully( DieGracefully.NEFARIOUS_CHAR, str(v[option])) return
def _check_dirs(self): """Check that the directories exist and are readable and writeable. This will raise an error if we cannot find the fastq directory, or the output directory cannot be written to.""" self.logger.info('Checking directories.') # We need to check the output directory and the working directory self.logger.debug('Checking output directory %s', self.outdir) # First,check that it exists if dir_funcs.dir_exists(self.outdir, self.logger): # Is it empty? if dir_funcs.dir_empty(self.outdir, self.logger): # And lastly, is it writeable? if dir_funcs.dir_writeable(self.outdir, self.logger): # All good! self.logger.debug('Output dir %s is valid', self.outdir) pass else: self.logger.error('Output dir %s cannot be written to!', self.outdir) DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR) else: self.logger.warning( 'Output dir %s is not empty! Results may be clobbered.', self.outdir) else: self.logger.warning('Output dir %s does not exist, making it', self.outdir) s = dir_funcs.make_dir(self.outdir, self.logger) if not s: DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR) # And do the same for the work directory self.logger.debug('Checking working directory %s', self.workdir) # First,check that it exists if dir_funcs.dir_exists(self.workdir, self.logger): # Is it empty? if dir_funcs.dir_empty(self.workdir, self.logger): # And lastly, is it writeable? if dir_funcs.dir_writeable(self.workdir, self.logger): # All good! self.logger.debug('Working dir %s is valid', self.workdir) pass else: self.logger.error('Working dir %s cannot be written to!', self.workdir) DieGracefully.die_gracefully(DieGracefully.BAD_WORKDIR) else: self.logger.warning('Working dir %s is not empty!', self.workdir) else: self.logger.warning('Working dir %s does not exist, making it', self.workdir) s = dir_funcs.make_dir(self.workdir, self.logger) if not s: DieGracefully.die_gracefully(DieGracefully.BAD_WORKDIR) return
def _validate_fastq_folder(self, d): """Raise an error if the FASTQ directory does not exist or does not have any FASTQ files.""" try: contents = os.listdir(d) except OSError: DieGracefully.die_gracefully(DieGracefully.BAD_FASTQ) # Check if there is at least one file ending in a standard fastq suffix fq_pat = re.compile(r'^.+((.fq(.gz)?$)|(.fastq(.gz)?$))') has_fastq = False for f in contents: if re.match(fq_pat, f): has_fastq = True break if has_fastq: return else: DieGracefully.die_gracefully(DieGracefully.EMPTY_FASTQ) return
def brnaseq(args): """This function loads the bulk RNAseq pipeline module, and runs through the steps for bulk RNAseq analysis.""" from CHURPipelines.Pipelines import BulkRNAseq p = BulkRNAseq.BulkRNAseqPipeline(args) p.setup(args) pipeline_fname, samplesheet_fname, key_name, qsub_dat = p.qsub() if not qsub_dat: DieGracefully.die_gracefully( DieGracefully.BRNASEQ_SUCCESS, pipeline_fname, samplesheet_fname, key_name) elif qsub_dat[2].returncode != 0: DieGracefully.die_gracefully( DieGracefully.BRNASEQ_SUBMIT_FAIL, qsub_dat) else: DieGracefully.die_gracefully( DieGracefully.BRNASEQ_SUBMIT_OK, pipeline_fname, samplesheet_fname, key_name, qsub_dat) return
def _get_fq_paths(self, d): """Read through the contents of a FASTQ directory and try to build a list of samples from it.""" # Write a regular expression that will match the parts of the filename # that come after the sample name samp_re = re.compile(r'(_S[0-9]+)?' r'(_[ATCG]{4,})?' r'(_L00[1-8])?' r'(_R(1|2))?_001\.((fq(\.gz)?$)|(fastq(\.gz)?$))') # Get all files that look like not-R2 fastq files, make the matching # case-insensitive. fq_re = re.compile(r'^.+[^_R2]_001\.((fq(\.gz)?$)|(fastq(\.gz)?$))', flags=re.I) cont = os.listdir(d) # From the Illumina BaseSpace online documentation, this is what the # standard filenames will look like: # SampleName_SX_L00Y_R1_001.fastq.gz # X: Sample nummber in samplesheet # Y: Lane number # R1/2: Fwd/reverse # 001: Always 001. # This is similar to the UMGC filenames, which are split across lanes # and then concatenated. # We will also define a regex for finding SRA-like samples. This should # work alongside the default regex. This will just look for files that # are named '*_1.fastq.gz' or similar. It is not great, but the SRA # does not have a very specific format. sra_re = re.compile(r'^.+_1\.((fq(\.gz)?$)|(fastq(\.gz)?$))') sra_samp_re = re.compile(r'_(1|2)\.((fq(\.gz)?$)|(fastq(\.gz)?$))') # We will iterate through all files in the directory. If they look like # R1 fastq files, we will extract the samplename from them. We will # then build the R2 filename and ask if that exists in the directory for f in cont: if re.match(fq_re, f): # Extract the samplename from the fastq name sn = re.sub(samp_re, '', f) # Tack it onto the samples dictionary self.samples[sn] = {} self.samples[sn]['R1'] = os.path.join(d, f) # Look for the R2. This is really dumb-looking but: # Reverse the R1 filename ([::-1]) # Replace 1R with 2R, with at most 1 replacement # Reverse it again r2 = f[::-1].replace('1R', '2R', 1)[::-1] # Extract the samplename from the hypothetical R2 path. If it # is different from the R1 samplename, then we have messed up # the part of the filename that we shouldn't have - the R2 does # not exist for this sammple, and it is single-end r2_sn = re.sub(samp_re, '', r2) if r2_sn != sn: self.samples[sn]['R2'] = '' elif r2 not in cont or r2 == f: self.samples[sn]['R2'] = '' elif r2 in cont and r2 != f: self.samples[sn]['R2'] = os.path.join(d, r2) else: self.samples[sn]['R2'] = '' elif re.match(sra_re, f): sn = re.sub(sra_samp_re, '', f) self.samples[sn] = {} self.samples[sn]['R1'] = os.path.join(d, f) r2 = f[::-1].replace('1_', '2_', 1)[::-1] r2_sn = re.sub(sra_samp_re, '', r2) if r2_sn != sn: self.samples[sn]['R2'] = '' elif r2 not in cont or r2 == f: self.samples[sn]['R2'] = '' elif r2 in cont and r2 != f: self.samples[sn]['R2'] = os.path.join(d, r2) else: self.samples[sn]['R2'] = '' self.sheet_logger.debug('Found samples:\n%s', pprint.pformat(self.samples)) # Simple check - if there are no samples, then throw an error here if len(self.samples) == 0: self.sheet_logger.error( 'No valid FASTQ files were found in the directory.') DieGracefully.die_gracefully(DieGracefully.EMPTY_FASTQ) return
def _group_help(self, args): """Do a simple check to see if the pipe_group argument is set. If it isn't, print a nice help message.""" if not args['pipe_group']: DieGracefully.die_gracefully(DieGracefully.GROUP_NO_PIPE) return
def _validate_args(self, a): """Validate arguments for the BulkRNAseqPipeline object. We define it in this file because it only really needs to be accessible to this subclass. Argument dependencies are pipeline-specific. For the bulk RNAseq analysis pipeline: - FASTQ dir and UMGC sheet are mutually exclusive - Organism and HISAT2 index + GTF are mutually exclusive - Check that the HISAT2 index is complete Further, sanitize the paths of the output dir, working dir, and hisat2 index. """ # Check the completeness of the argument dictionary. -f must be # specified and (-x and -g) or -r must be specified. After checking the # FASTQ folder, check the helper commands if not a['fq_folder']: DieGracefully.die_gracefully(DieGracefully.BRNASEQ_INC_ARGS) elif not ((a['hisat2_idx'] and a['gtf']) or a['organism']): DieGracefully.die_gracefully(DieGracefully.BRNASEQ_INC_ARGS) elif (a['hisat2_idx'] and a['gtf']) and a['organism']: DieGracefully.die_gracefully(DieGracefully.BRNASEQ_CONFLICT) # Convert all of the paths into absolute paths a['fq_folder'] = os.path.realpath( os.path.expanduser(str(a['fq_folder']))) a['hisat2_idx'] = os.path.realpath( os.path.expanduser(str(a['hisat2_idx']))) a['gtf'] = os.path.realpath(os.path.expanduser(str(a['gtf']))) a['outdir'] = os.path.realpath(os.path.expanduser(str(a['outdir']))) a['workdir'] = os.path.realpath(os.path.expanduser(str(a['workdir']))) if a['expr_groups']: a['expr_groups'] = os.path.realpath(os.path.expanduser(str( a['expr_groups']))) try: assert a['headcrop'] >= 0 except AssertionError: DieGracefully.die_gracefully( DieGracefully.BAD_NUMBER, '--headcrop') try: assert a['mincpm'] >= 0 except AssertionError: DieGracefully.die_gracefully( DieGracefully.BAD_NUMBER, '--min-cpm') try: assert a['rrna_screen'] >= 0 except AssertionError: DieGracefully.die_gracefully( DieGracefully.BAD_NUMBER, '--rrna_screen') try: assert a['subsample'] >= 0 if a['subsample'] > 0: assert a['subsample'] >= a['rrna_screen'] except AssertionError: DieGracefully.die_gracefully( DieGracefully.BAD_NUMBER, '--subsample') try: assert a['mem'] >= 12000 except AssertionError: DieGracefully.die_gracefully( DieGracefully.BAD_NUMBER, '--mem') try: assert a['walltime'] >= 2 except AssertionError: DieGracefully.die_gracefully( DieGracefully.BAD_NUMBER, '--walltime') self.pipe_logger.debug('GTF: %s', a['gtf']) self.pipe_logger.debug('Adapters: %s', a['adapters']) self.pipe_logger.debug('FASTQ Folder: %s', a['fq_folder']) self.pipe_logger.debug('Output Dir: %s', a['outdir']) self.pipe_logger.debug('Working Dir: %s', a['workdir']) self.pipe_logger.debug('HISAT2 Idx: %s', a['hisat2_idx']) self.pipe_logger.debug('Expr Groups: %s', a['expr_groups']) self.pipe_logger.debug('Strandness: %s', a['strand']) # Check that the adapters and GTF file exist try: handle = open(a['gtf'], 'r') handle.close() except OSError: DieGracefully.die_gracefully(DieGracefully.BAD_GTF) if not a['adapters']: a['adapters'] = '$TRIMMOMATIC/adapters/all_illumina_adapters.fa' else: try: a['adapters'] = os.path.realpath( os.path.expanduser(str(a['adapters']))) handle = open(a['adapters'], 'r') handle.close() except OSError: DieGracefully.die_gracefully(DieGracefully.BAD_ADAPT) if a['expr_groups']: try: handle = open(a['expr_groups'], 'r') handle.close() except OSError: DieGracefully.die_gracefully(DieGracefully.BRNASEQ_BAD_GPS) # Validate the FASTQ folder self._validate_fastq_folder(a['fq_folder']) # Validate the hisat2 index self._validate_hisat_idx(a['hisat2_idx']) # Sanitize the hisat2 index path a['hisat2_idx'] = dir_funcs.sanitize_path( a['hisat2_idx'], self.pipe_logger) return a
def qsub(self): """Write the qsub command. We will need the path to the samplesheet, the number of samples in the samplesheet, and the scheduler options that were passed as arguments. This is defined in the subclass rather than in the main Pipeline class because the exact form of the qsub command depends on which pipeline we are running.""" ss = self._prepare_samplesheet() # Make the qsub array key keyname = default_files.default_array_key(self.pipe_name) keyname = os.path.join(self.real_out, keyname) if os.path.isfile(keyname): self.pipe_logger.warning( 'Sbatch key file %s exists. Overwriting!', keyname) try: handle = open(keyname, 'w') except OSError: DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR) # The sheet is sorted in this way before it is written to disk, so it # should be safe to sort it this way handle.write('Sbatch.Index\tSampleName\n') for index, samplename in enumerate(sorted(self.sheet.final_sheet)): handle.write(str(index+1) + '\t' + samplename + '\n') handle.flush() handle.close() # Make the script filename pname = default_files.default_pipeline(self.pipe_name) pname = os.path.join(self.real_out, pname) if os.path.isfile(pname): self.pipe_logger.warning( 'Submission script %s already exists. Overwriting!', pname) try: handle = open(pname, 'w') except OSError: DieGracefully.die_gracefully(DieGracefully.BAD_OUTDIR) # Write the header of the script handle.write('#!/bin/bash\n') # Write some meta-data lines handle.write('# Generated by CHURP version ' + CHURPipelines.__version__ + '\n') handle.write('# Generated at ' + CHURPipelines.NOW + '\n') handle.write('set -e\n') handle.write('set -u\n') handle.write('set -o pipefail\n') self.pipe_logger.debug( 'Number of samples: %i', len(self.sheet.final_sheet)) self.pipe_logger.debug('Samplesheet: %s', ss) # Write command to figure out email address of submitting user handle.write('user_name="$(id -u -n)"\n') handle.write('user_email="${user_name}@umn.edu"\n') # Set the group string here if self.group: qsub_group = '-A ' + self.group else: qsub_group = '' qsub_array = '1' if len(self.sheet.final_sheet) > 1: qsub_array += '-' + str(len(self.sheet.final_sheet)) # Write a few variables into the header of the script so they are # easy to find handle.write('KEYFILE=' + '"' + keyname + '"\n') handle.write('QSUB_ARRAY=' + '"' + qsub_array + '"\n') handle.write('OUTDIR=' + '"' + str(self.real_out) + '"\n') handle.write('WORKDIR=' + '"' + str(self.real_work) + '"\n') handle.write('DE_SCRIPT=' + '"' + self.de_script + '"\n') handle.write('REPORT_SCRIPT=' + '"' + self.report_script + '"\n') handle.write('SAMPLESHEET=' + '"' + ss + '"\n') handle.write('PURGE=' + '"' + self.purge + '"\n') handle.write('RRNA_SCREEN=' + '"' + self.rrna_screen + '"\n') handle.write('SUBSAMPLE=' + '"' + self.subsample + '"\n') handle.write('PIPE_SCRIPT="$(cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )/$(basename $0)"\n') # These are the variables we want to export into the single sample job # script. single_cmd_vars = ','.join([ 'SampleSheet="${SAMPLESHEET}"', 'PURGE="${PURGE}"', 'RRNA_SCREEN="${RRNA_SCREEN}"', 'SUBSAMPLE="${SUBSAMPLE}"' ]) aln_cmd = [ 'sbatch', '--parsable', '--ignore-pbs', '-p', self.msi_queue, '--mail-type=ALL', '--mail-user="******"', qsub_group, '-o', '"${OUTDIR}/bulk_rnaseq_single_sample-%A.%a.out"', '-e', '"${OUTDIR}/bulk_rnaseq_single_sample-%A.%a.err"', '-N', '1', '--mem=' + str(self.mem) + 'mb', '--tmp=12gb', '-n', '1', '-c', str(self.ppn), '--time=' + str(self.walltime * 60), '--array="${QSUB_ARRAY}"', '--export=' + single_cmd_vars, self.single_sample_script, '||', 'exit', '1'] # Write the first qsub command handle.write('single_id=$(' + ' '.join(aln_cmd) + ')\n') # This is the command for counting and normalizing reads summary_vars = ','.join([ 'SampleSheet="${SAMPLESHEET}"', 'MINLEN="' + self.min_gene_len + '"', 'MINCPM="' + self.min_cpm + '"', 'RSUMMARY="${DE_SCRIPT}"', 'PIPE_SCRIPT="${PIPE_SCRIPT}"', 'BULK_RNASEQ_REPORT="${REPORT_SCRIPT}"']) summary_cmd = [ 'sbatch', '--parsable', '--ignore-pbs', '-p', self.msi_queue, '--mail-type=ALL', '--mail-user="******"', qsub_group, '-o', '"${OUTDIR}/run_summary_stats-%j.out"', '-e', '"${OUTDIR}/run_summary_stats-%j.err"', '-N', '1', '--mem=' + str(self.mem) + 'mb', '--tmp=12gb', '-n', '1', '-c', str(self.ppn), '--time=720', '--depend=afterok:${single_id}', '--export=' + summary_vars, self.summary_script, '||', 'exit', '1'] # Write the second command handle.write('summary_id=$(' + ' '.join(summary_cmd) + ')\n') # Write some echo statements for users' information handle.write('echo "Output and logs will be written to ${OUTDIR}"\n') handle.write('echo "Emails will be sent to ${user_email}"\n') handle.write('echo "Sbatch array to samplename key: ${KEYFILE}"\n') handle.write('echo "Single samples job array ID: ${single_id}"\n') handle.write('echo "Summary job ID: ${summary_id}"\n') self.pipe_logger.debug('sbatch:\n%s', ' '.join(aln_cmd)) self.pipe_logger.debug('sbatch:\n%s', ' '.join(summary_cmd)) handle.flush() handle.close() # Check if we want to automatically submit the script if self.nosubmit: qsub_dat = None else: qsub_cmd = ['bash', pname] qsub_proc = subprocess.Popen( qsub_cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) qsub_stdout, qsub_stderr = qsub_proc.communicate() qsub_dat = (qsub_stdout, qsub_stderr, qsub_proc) return (pname, ss, keyname, qsub_dat)