def run(self, sample_dir, patientid=None, min_reads=MIN_READS, min_alt_reads=MIN_ALT_READS, min_frequency=MIN_FREQ, allmuts=True): timer = Timer(start_now=True) out_dir = os.path.join(sample_dir, 'sciClone', 'allmuts' if allmuts else 'coding') if not os.path.exists(out_dir): os.makedirs(out_dir) if patientid is None: patientid = os.path.basename(sample_dir) #logging: log_file = os.path.join(out_dir, 'sciclone-run.{}'.format(min_reads)) logging.basicConfig(filename=log_file + '.pylog', filemode="w+", format='%(asctime)s %(levelname)s: %(message)s', datefmt='%H:%M:%S', level=logging.DEBUG) logging.debug(log_file) reporter = MutationReporter(patientid, sample_dir) muts_file, filterstring = create_sciclone_muts_file( reporter=reporter, out_dir=out_dir, patient=patientid, min_reads=min_reads, min_alt_reads=min_alt_reads, min_frequency=min_frequency) cna_segments_file = os.path.join(sample_dir, 'copywriteR', 'CNAprofiles', 'log2_CNA.segmented.tsv') cmd = CMD_SCICLONE.format(patientid, out_dir, muts_file, cna_segments_file, min_reads, filterstring) logging.info('Full command: "{}"'.format(cmd)) rlogfile = open(log_file + '{}.Rlog'.format(filterstring), 'w+') c = subprocess.Popen(cmd, shell=True, stderr=rlogfile, stdout=rlogfile) streamdata = c.communicate()[0] rlogfile.flush() rlogfile.close() logging.debug('R Return code {}'.format(c.returncode)) elapsed_time = timer.get_elapsed_time() logging.info(elapsed_time) if c.returncode == 0: send_email('*****@*****.**', "{} SCICLONE done".format(patientid), elapsed_time) else: send_email('*****@*****.**', "FAILURE of {} SCICLONE".format(patientid), elapsed_time) return c.returncode
def run(self, input_dir, exec_dir='.', sample_name=None, passes=2, trim=True): super().run(input_dir, exec_dir, sample_name) self.check_fastqs_exist(input_dir, self.sample) workdir_trim = self.work_dirs.get(TRIM) workdir_align = self.work_dirs.get(ALIGN) conf_dict = self.load_configuration() os.chdir(exec_dir) if trim: self.run_cutadapt(conf_dict, input_dir, self.sample, workdir_trim) else: workdir_trim = input_dir c = self.run_star(conf_dict, self.sample, workdir_align, workdir_trim, passes, trim) elapsed_time = self.timer.get_elapsed_time() logging.debug(self.timer.get_elapsed_time()) if c == 0: send_email(conf_dict['DEFAULT.notify_email'], "{} STAR-ALIGN done".format(self.sample), elapsed_time) else: send_email(conf_dict['DEFAULT.notify_email'], "FAILURE of {} STAR-ALIGN".format(self.sample), elapsed_time) return c
def run(self, input_bam, exec_dir='.', sample_name=None, detect_unannotated=False): super().run(input_bam, exec_dir, sample_name) if detect_unannotated: workdir_cufflinks = self.work_dirs.get_popvar_vcf(CUFFLINKS_NOVEL) else: workdir_cufflinks = self.work_dirs.get_popvar_vcf(CUFFLINKS) conf_dict = self.load_configuration() os.chdir(exec_dir) c = self.run_cufflinks(conf_dict, self.sample, input_bam, workdir_cufflinks, detect_unannotated) elapsed_time = self.timer.get_elapsed_time() logging.debug(self.timer.get_elapsed_time()) runtype = "normal" if not detect_unannotated else "guide" if c == 0: send_email( conf_dict['DEFAULT.notify_email'], "{} {} () done".format(self.sample, self.task_name, runtype), elapsed_time) else: send_email( conf_dict['DEFAULT.notify_email'], "FAILURE of {} CUFFLINKS {}".format(self.sample, self.task_name, runtype), elapsed_time) return c
def run(self, patient_dir=None, patientid=None, sample_dir=None, scratch_dir=None, alt_gtf=None): assert patient_dir != sample_dir super().run(patient_dir if patient_dir is not None else sample_dir, patient_dir if patient_dir is not None else sample_dir, patientid) conf_dict = self.load_configuration() if not alt_gtf is None: conf_dict[TRANSCRIPT_GTF] = alt_gtf samples = {} if sample_dir == None: for sdir in os.listdir(patient_dir): samples[sdir] = os.path.join(patient_dir, sdir) else: samples[os.path.basename(sample_dir)] = sample_dir jobs = {} for sampleid, sdir in samples.items(): print(sdir) sample_bam_dir = join(sdir, 'align') bamfile = "{0}/{1}.bam".format(sample_bam_dir, sampleid) if not os.path.exists(bamfile): logging.warning( "BAM file not found: {}\n-------------------------\n". format(bamfile)) continue else: logging.info( "BAM file: {}\n-------------------------\n".format( bamfile)) outdir = join(sample_bam_dir, 'qc', 'rnaseqc') if scratch_dir is None: scratch_dir_temp = outdir else: os.makedirs(scratch_dir, exist_ok=True) prefix = join(os.path.expanduser(scratch_dir), 'TMP.seqc-' + sampleid) scratch_dir_temp = tempfile.mkdtemp(prefix=prefix) cmd = CMD_RNA.format( **{ 'PIPEFAIL': "set -euxo pipefail; ", 'RNASEQC_JAR': conf_dict[RNASEQC_JAR], 'OUTDIR': scratch_dir_temp, 'BAM': bamfile, #'GTF': conf_dict[TRANSCRIPT_GTF].replace('.gtf','.RNASEQC.gtf'), 'GTF': conf_dict[TRANSCRIPT_GTF], 'GENOME': conf_dict[GENOME], 'SAMPLE': sampleid, 'MV_SCRATCH': '; mv {} {}'.format(scratch_dir_temp, outdir) if scratch_dir_temp != outdir else "; " }) if not os.path.exists(join(sample_bam_dir, 'qc')): os.mkdir(join(sample_bam_dir, 'qc')) logging.basicConfig( format='%(asctime)s %(levelname)s: %(message)s', datefmt='%H:%M:%S', level=logging.DEBUG) print(cmd) c = ExternalCommand(cmd, output_file=outdir).run() jobs[sampleid] = c if c != 0: break returncodes = {} for p, c in jobs.items(): logging.debug('Return code of {} is: {}'.format(p, c)) returncodes[p] = c elapsed_time = self.timer.get_elapsed_time() logging.info(elapsed_time) reportid = None if patientid is None and patient_dir is not None: reportid = os.path.basename(patient_dir) else: reportid = list(samples.keys())[0] if max(returncodes.values()) == 0 and min(returncodes.values()) == 0: send_email('*****@*****.**', "{} RNA-SEQ-C done".format(reportid), elapsed_time) else: send_email('*****@*****.**', "FAILURE of {} RNA-SEQ-C".format(reportid), "{}\n\n{}".format(elapsed_time, returncodes))
def run(self, out_dir=None, covered_regions_bed=COVERED_BED, genes_bed=GENES_BED, bin_size='20kb', normal_sample="CR", time_points=None): timer = Timer(start_now=True) if out_dir is None: out_dir = self.patient_dir if len(self.patient) < 4: raise RuntimeError("too short patientid: {}".format(self.patient)) if time_points is None: self.time_points = [ID, REL] else: self.time_points = time_points bcbio_folder = os.path.dirname( get_mut_batch_folder(self.patient, self.patient_dir, self.conf_dict[BCBIO_BACKUP])) id_file = os.path.join( bcbio_folder, self.patient + '_' + self.time_points[0], self.patient + '_' + '{}-ready.bam'.format(self.time_points[0])) if len(self.time_points) == 2: rel_file = os.path.join( bcbio_folder, self.patient + '_' + self.time_points[1], self.patient + '_' + '{}-ready.bam'.format(self.time_points[1])) cr_file = os.path.join( bcbio_folder, self.patient + '_' + normal_sample, self.patient + '_{}-ready.bam'.format(normal_sample)) out_dir = os.path.join(out_dir, 'copywriteR') if not os.path.exists(out_dir): os.mkdir(out_dir) elif os.path.exists(os.path.join( out_dir, "CNAprofiles")) and os.path.islink( os.path.join(out_dir, "CNAprofiles")): logging.info("Unlinking CNAprofiles") os.unlink(os.path.join(out_dir, "CNAprofiles")) bin_numeric = bin_size.replace('kb', '000') cmd = '' if len(self.time_points) == 2: cmd = CMD_CPWR.format(self.patient, out_dir, id_file, rel_file, cr_file, covered_regions_bed, genes_bed, bin_numeric) else: cmd = CMD_CPWR_one_tumor_sample.format(self.patient, out_dir, id_file, cr_file, covered_regions_bed, genes_bed, bin_numeric) oldprofiles = 'CNAprofiles' newprofiles = 'CNAprofiles_' + bin_size cmd_cleaning = CMD_MV.format(bin_size, out_dir, oldprofiles, newprofiles) returncode = ExternalCommand(" ; ".join(["set -e", cmd, cmd_cleaning]), output_file=os.path.join( out_dir, newprofiles)).run() elapsed_time = timer.get_elapsed_time() logging.info(elapsed_time) loginfo = "\n" try: logfile = os.path.join(out_dir, newprofiles, "CopywriteR.log") logproc = subprocess.Popen( "grep -i mad {} | head -3 | sed 's/^INFO.\+]//'".format( logfile), stdout=subprocess.PIPE, shell=True) loginfo += logproc.communicate()[0].decode() except Exception as e: logging.debug("log-error", e) if returncode == 0: send_email('*****@*****.**', "{} COPYWRITER {} done".format(self.patient, bin_size), elapsed_time + loginfo) else: send_email( '*****@*****.**', "FAILURE of {} COPYWRITER {}".format(self.patient, bin_size), elapsed_time) return returncode
def run(self, patient_dir, patientid=None, time_points=None, exonseq=False, rnaseq=False, panel=False): super().run(patient_dir, patient_dir, patientid) if time_points == None or time_points == []: time_points = [ID, REL, CR] if rnaseq: time_points = [ID, REL] jobs = {} conf_dict = self.load_configuration() if patientid is None: patientid = os.path.basename(patient_dir) for t in time_points: sample_bam_dir = None bamfile = None outfile = None if exonseq or panel: folder = 'bcbio' if exonseq else 'panel' target_regions_bed = conf_dict[TARGET_REGIONS][ 'exon' if exonseq else 'panel'] sample_bam_dir = join(patient_dir, folder, '{}_{}'.format(patientid, t)) bamfile = "{0}/{1}_{2}-ready.bam".format( sample_bam_dir, patientid, t) cmd = CMD_EXON.format( **{ 'QUALIMAP_BIN': conf_dict[QUALIMAP_BIN], 'OUTDIR': join(sample_bam_dir, 'qc', 'qualimap'), 'TARGET_BED': target_regions_bed, 'BAM': bamfile }) elif rnaseq: sample_bam_dir = join(patient_dir, '{}_{}'.format(patientid, t), 'align') bamfile = "{0}/{1}_{2}.bam".format(sample_bam_dir, patientid, t) countfile = bamfile.replace('.bam', 'qualimap-genecounts.txt') cmd = CMD_RNA.format( **{ 'QUALIMAP_BIN': conf_dict[QUALIMAP_BIN], 'OUTDIR': join(sample_bam_dir, 'qc', 'qualimap'), 'BAM': bamfile, 'GTF': conf_dict[TRANSCRIPT_GTF], 'COUNTS_FILE': countfile }) outfile = countfile if not os.path.exists(bamfile): logging.warning("BAM file not found: {}".format(bamfile)) continue else: logging.info("BAM file: {}".format(bamfile)) #logging: if not os.path.exists(join(sample_bam_dir, 'qc')): os.mkdir(join(sample_bam_dir, 'qc')) logfilename = join(sample_bam_dir, 'qc', '{}_{}-qualimap.log'.format(patientid, t)) log_file = open(logfilename, 'w+') logging.basicConfig( format='%(asctime)s %(levelname)s: %(message)s', datefmt='%H:%M:%S', level=logging.DEBUG) logging.info('Full command: "{}"'.format(cmd)) logging.info('See log file: {}'.format(logfilename)) c = subprocess.Popen(cmd, shell=True, stderr=log_file, stdout=log_file) jobs["{}_{}".format(patientid, t)] = c returncodes = {} for p, j in jobs.items(): j.wait() logging.debug('Return code of {} is: {}'.format(p, j.returncode)) returncodes[p] = j.returncode elapsed_time = self.timer.get_elapsed_time() logging.info(elapsed_time) if max(returncodes.values()) == 0 and min(returncodes.values()) == 0: send_email('*****@*****.**', "{} Qualimap done".format(patientid), elapsed_time) else: send_email('*****@*****.**', "FAILURE of {} Qualimap".format(patientid), "{}\n\n{}".format(elapsed_time, returncodes))
def run(self, patient_dir=None, patient_id=None, sample_dir=None, scratch_dir=None, output_dir=None): assert patient_dir != sample_dir if output_dir is None: output_dir = os.getcwd() super().run(sample_dir, output_dir, patient_id=patient_id) conf_dict = self.load_configuration() del output_dir res_file = self.output_files.generate('defuse', 'results.tsv') if os.path.isfile(res_file): logger.warning( "FILE already exists {}. \n\t EXITING".format(res_file)) return sample_out_dir = self.work_dirs.get('defuse') if scratch_dir is None: scratch_dir = self.work_dirs.get('defuse', scratch=True) commands = [] commands.append( ExternalCommand(CMD_FASTA.format( **{ "INPUT_DIR": sample_dir, "SCRATCH_DIR": scratch_dir, "SAMPLE": self.sample }), output_file=os.path.join(scratch_dir, self.sample + "_R2.fastq"))) commands.append( ExternalCommand( CMD_DEFUSE.format( **{ "CORES": conf_dict[DEFUSE_CORES], "DEFUSE_CONFIG": conf_dict[DEFUSE_CONFIG], "DEFUSE_DATA_DIR": conf_dict[DEFUSE_DATA_DIR], "SCRATCH_DIR": scratch_dir, "SAMPLE": self.sample }))) commands.append( ExternalCommand( CMD_CLEANUP.format( **{ "SCRATCH_DIR": scratch_dir, "SAMPLE": self.sample, "OUTPUT_FOLDER": sample_out_dir }))) logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', datefmt='%H:%M:%S', level=logging.DEBUG) for external_command in commands: c = external_command.run() elapsed_time = self.timer.get_elapsed_time() if c != 0: send_email('*****@*****.**', "FAILURE of {} DEFUSE".format(self.sample), elapsed_time) return logging.info(elapsed_time) send_email('*****@*****.**', "{} DEFUSE done".format(self.sample), elapsed_time)