def run(self,
            sample_dir,
            patientid=None,
            min_reads=MIN_READS,
            min_alt_reads=MIN_ALT_READS,
            min_frequency=MIN_FREQ,
            allmuts=True):

        timer = Timer(start_now=True)
        out_dir = os.path.join(sample_dir, 'sciClone',
                               'allmuts' if allmuts else 'coding')
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        if patientid is None:
            patientid = os.path.basename(sample_dir)

        #logging:
        log_file = os.path.join(out_dir, 'sciclone-run.{}'.format(min_reads))
        logging.basicConfig(filename=log_file + '.pylog',
                            filemode="w+",
                            format='%(asctime)s %(levelname)s: %(message)s',
                            datefmt='%H:%M:%S',
                            level=logging.DEBUG)
        logging.debug(log_file)

        reporter = MutationReporter(patientid, sample_dir)
        muts_file, filterstring = create_sciclone_muts_file(
            reporter=reporter,
            out_dir=out_dir,
            patient=patientid,
            min_reads=min_reads,
            min_alt_reads=min_alt_reads,
            min_frequency=min_frequency)

        cna_segments_file = os.path.join(sample_dir, 'copywriteR',
                                         'CNAprofiles',
                                         'log2_CNA.segmented.tsv')

        cmd = CMD_SCICLONE.format(patientid, out_dir, muts_file,
                                  cna_segments_file, min_reads, filterstring)
        logging.info('Full command: "{}"'.format(cmd))
        rlogfile = open(log_file + '{}.Rlog'.format(filterstring), 'w+')
        c = subprocess.Popen(cmd, shell=True, stderr=rlogfile, stdout=rlogfile)
        streamdata = c.communicate()[0]
        rlogfile.flush()
        rlogfile.close()
        logging.debug('R Return code {}'.format(c.returncode))

        elapsed_time = timer.get_elapsed_time()
        logging.info(elapsed_time)

        if c.returncode == 0:
            send_email('*****@*****.**',
                       "{} SCICLONE done".format(patientid), elapsed_time)
        else:
            send_email('*****@*****.**',
                       "FAILURE of {} SCICLONE".format(patientid),
                       elapsed_time)
        return c.returncode
    def run(self,
            input_dir,
            exec_dir='.',
            sample_name=None,
            passes=2,
            trim=True):
        super().run(input_dir, exec_dir, sample_name)

        self.check_fastqs_exist(input_dir, self.sample)

        workdir_trim = self.work_dirs.get(TRIM)
        workdir_align = self.work_dirs.get(ALIGN)

        conf_dict = self.load_configuration()

        os.chdir(exec_dir)

        if trim:
            self.run_cutadapt(conf_dict, input_dir, self.sample, workdir_trim)
        else:
            workdir_trim = input_dir

        c = self.run_star(conf_dict, self.sample, workdir_align, workdir_trim,
                          passes, trim)

        elapsed_time = self.timer.get_elapsed_time()
        logging.debug(self.timer.get_elapsed_time())

        if c == 0:
            send_email(conf_dict['DEFAULT.notify_email'],
                       "{} STAR-ALIGN done".format(self.sample), elapsed_time)
        else:
            send_email(conf_dict['DEFAULT.notify_email'],
                       "FAILURE of {} STAR-ALIGN".format(self.sample),
                       elapsed_time)

        return c
Beispiel #3
0
    def run(self,
            input_bam,
            exec_dir='.',
            sample_name=None,
            detect_unannotated=False):
        super().run(input_bam, exec_dir, sample_name)

        if detect_unannotated:
            workdir_cufflinks = self.work_dirs.get_popvar_vcf(CUFFLINKS_NOVEL)
        else:
            workdir_cufflinks = self.work_dirs.get_popvar_vcf(CUFFLINKS)

        conf_dict = self.load_configuration()

        os.chdir(exec_dir)

        c = self.run_cufflinks(conf_dict, self.sample, input_bam,
                               workdir_cufflinks, detect_unannotated)

        elapsed_time = self.timer.get_elapsed_time()
        logging.debug(self.timer.get_elapsed_time())

        runtype = "normal" if not detect_unannotated else "guide"
        if c == 0:
            send_email(
                conf_dict['DEFAULT.notify_email'],
                "{} {} () done".format(self.sample, self.task_name,
                                       runtype), elapsed_time)
        else:
            send_email(
                conf_dict['DEFAULT.notify_email'],
                "FAILURE of {} CUFFLINKS {}".format(self.sample,
                                                    self.task_name, runtype),
                elapsed_time)

        return c
Beispiel #4
0
    def run(self,
            patient_dir=None,
            patientid=None,
            sample_dir=None,
            scratch_dir=None,
            alt_gtf=None):
        assert patient_dir != sample_dir

        super().run(patient_dir if patient_dir is not None else sample_dir,
                    patient_dir if patient_dir is not None else sample_dir,
                    patientid)

        conf_dict = self.load_configuration()
        if not alt_gtf is None:
            conf_dict[TRANSCRIPT_GTF] = alt_gtf

        samples = {}
        if sample_dir == None:
            for sdir in os.listdir(patient_dir):
                samples[sdir] = os.path.join(patient_dir, sdir)
        else:
            samples[os.path.basename(sample_dir)] = sample_dir

        jobs = {}

        for sampleid, sdir in samples.items():
            print(sdir)
            sample_bam_dir = join(sdir, 'align')
            bamfile = "{0}/{1}.bam".format(sample_bam_dir, sampleid)

            if not os.path.exists(bamfile):
                logging.warning(
                    "BAM file not found: {}\n-------------------------\n".
                    format(bamfile))
                continue
            else:
                logging.info(
                    "BAM file: {}\n-------------------------\n".format(
                        bamfile))

            outdir = join(sample_bam_dir, 'qc', 'rnaseqc')
            if scratch_dir is None:
                scratch_dir_temp = outdir
            else:
                os.makedirs(scratch_dir, exist_ok=True)
                prefix = join(os.path.expanduser(scratch_dir),
                              'TMP.seqc-' + sampleid)
                scratch_dir_temp = tempfile.mkdtemp(prefix=prefix)

            cmd = CMD_RNA.format(
                **{
                    'PIPEFAIL':
                    "set -euxo pipefail; ",
                    'RNASEQC_JAR':
                    conf_dict[RNASEQC_JAR],
                    'OUTDIR':
                    scratch_dir_temp,
                    'BAM':
                    bamfile,
                    #'GTF': conf_dict[TRANSCRIPT_GTF].replace('.gtf','.RNASEQC.gtf'),
                    'GTF':
                    conf_dict[TRANSCRIPT_GTF],
                    'GENOME':
                    conf_dict[GENOME],
                    'SAMPLE':
                    sampleid,
                    'MV_SCRATCH':
                    '; mv {} {}'.format(scratch_dir_temp, outdir)
                    if scratch_dir_temp != outdir else "; "
                })

            if not os.path.exists(join(sample_bam_dir, 'qc')):
                os.mkdir(join(sample_bam_dir, 'qc'))

            logging.basicConfig(
                format='%(asctime)s %(levelname)s: %(message)s',
                datefmt='%H:%M:%S',
                level=logging.DEBUG)

            print(cmd)
            c = ExternalCommand(cmd, output_file=outdir).run()
            jobs[sampleid] = c
            if c != 0:
                break

        returncodes = {}
        for p, c in jobs.items():
            logging.debug('Return code of {} is: {}'.format(p, c))
            returncodes[p] = c

        elapsed_time = self.timer.get_elapsed_time()
        logging.info(elapsed_time)

        reportid = None
        if patientid is None and patient_dir is not None:
            reportid = os.path.basename(patient_dir)
        else:
            reportid = list(samples.keys())[0]

        if max(returncodes.values()) == 0 and min(returncodes.values()) == 0:
            send_email('*****@*****.**',
                       "{} RNA-SEQ-C done".format(reportid), elapsed_time)
        else:
            send_email('*****@*****.**',
                       "FAILURE of {}  RNA-SEQ-C".format(reportid),
                       "{}\n\n{}".format(elapsed_time, returncodes))
    def run(self,
            out_dir=None,
            covered_regions_bed=COVERED_BED,
            genes_bed=GENES_BED,
            bin_size='20kb',
            normal_sample="CR",
            time_points=None):

        timer = Timer(start_now=True)

        if out_dir is None:
            out_dir = self.patient_dir

        if len(self.patient) < 4:
            raise RuntimeError("too short patientid: {}".format(self.patient))

        if time_points is None:
            self.time_points = [ID, REL]
        else:
            self.time_points = time_points

        bcbio_folder = os.path.dirname(
            get_mut_batch_folder(self.patient, self.patient_dir,
                                 self.conf_dict[BCBIO_BACKUP]))

        id_file = os.path.join(
            bcbio_folder, self.patient + '_' + self.time_points[0],
            self.patient + '_' + '{}-ready.bam'.format(self.time_points[0]))
        if len(self.time_points) == 2:
            rel_file = os.path.join(
                bcbio_folder, self.patient + '_' + self.time_points[1],
                self.patient + '_' +
                '{}-ready.bam'.format(self.time_points[1]))
        cr_file = os.path.join(
            bcbio_folder, self.patient + '_' + normal_sample,
            self.patient + '_{}-ready.bam'.format(normal_sample))

        out_dir = os.path.join(out_dir, 'copywriteR')
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        elif os.path.exists(os.path.join(
                out_dir, "CNAprofiles")) and os.path.islink(
                    os.path.join(out_dir, "CNAprofiles")):
            logging.info("Unlinking CNAprofiles")
            os.unlink(os.path.join(out_dir, "CNAprofiles"))

        bin_numeric = bin_size.replace('kb', '000')
        cmd = ''
        if len(self.time_points) == 2:
            cmd = CMD_CPWR.format(self.patient, out_dir, id_file, rel_file,
                                  cr_file, covered_regions_bed, genes_bed,
                                  bin_numeric)
        else:
            cmd = CMD_CPWR_one_tumor_sample.format(self.patient, out_dir,
                                                   id_file, cr_file,
                                                   covered_regions_bed,
                                                   genes_bed, bin_numeric)
        oldprofiles = 'CNAprofiles'
        newprofiles = 'CNAprofiles_' + bin_size
        cmd_cleaning = CMD_MV.format(bin_size, out_dir, oldprofiles,
                                     newprofiles)
        returncode = ExternalCommand(" ; ".join(["set -e", cmd, cmd_cleaning]),
                                     output_file=os.path.join(
                                         out_dir, newprofiles)).run()

        elapsed_time = timer.get_elapsed_time()
        logging.info(elapsed_time)
        loginfo = "\n"
        try:
            logfile = os.path.join(out_dir, newprofiles, "CopywriteR.log")
            logproc = subprocess.Popen(
                "grep -i mad {} |  head -3 | sed 's/^INFO.\+]//'".format(
                    logfile),
                stdout=subprocess.PIPE,
                shell=True)
            loginfo += logproc.communicate()[0].decode()
        except Exception as e:
            logging.debug("log-error", e)

        if returncode == 0:
            send_email('*****@*****.**',
                       "{} COPYWRITER {} done".format(self.patient, bin_size),
                       elapsed_time + loginfo)
        else:
            send_email(
                '*****@*****.**',
                "FAILURE of {} COPYWRITER {}".format(self.patient,
                                                     bin_size), elapsed_time)
        return returncode
    def run(self,
            patient_dir,
            patientid=None,
            time_points=None,
            exonseq=False,
            rnaseq=False,
            panel=False):
        super().run(patient_dir, patient_dir, patientid)

        if time_points == None or time_points == []:
            time_points = [ID, REL, CR]
            if rnaseq:
                time_points = [ID, REL]

        jobs = {}

        conf_dict = self.load_configuration()

        if patientid is None:
            patientid = os.path.basename(patient_dir)

        for t in time_points:
            sample_bam_dir = None
            bamfile = None
            outfile = None
            if exonseq or panel:
                folder = 'bcbio' if exonseq else 'panel'
                target_regions_bed = conf_dict[TARGET_REGIONS][
                    'exon' if exonseq else 'panel']
                sample_bam_dir = join(patient_dir, folder,
                                      '{}_{}'.format(patientid, t))
                bamfile = "{0}/{1}_{2}-ready.bam".format(
                    sample_bam_dir, patientid, t)
                cmd = CMD_EXON.format(
                    **{
                        'QUALIMAP_BIN': conf_dict[QUALIMAP_BIN],
                        'OUTDIR': join(sample_bam_dir, 'qc', 'qualimap'),
                        'TARGET_BED': target_regions_bed,
                        'BAM': bamfile
                    })
            elif rnaseq:
                sample_bam_dir = join(patient_dir,
                                      '{}_{}'.format(patientid, t), 'align')
                bamfile = "{0}/{1}_{2}.bam".format(sample_bam_dir, patientid,
                                                   t)
                countfile = bamfile.replace('.bam', 'qualimap-genecounts.txt')
                cmd = CMD_RNA.format(
                    **{
                        'QUALIMAP_BIN': conf_dict[QUALIMAP_BIN],
                        'OUTDIR': join(sample_bam_dir, 'qc', 'qualimap'),
                        'BAM': bamfile,
                        'GTF': conf_dict[TRANSCRIPT_GTF],
                        'COUNTS_FILE': countfile
                    })
                outfile = countfile

            if not os.path.exists(bamfile):
                logging.warning("BAM file not found: {}".format(bamfile))
                continue
            else:
                logging.info("BAM file: {}".format(bamfile))

            #logging:
            if not os.path.exists(join(sample_bam_dir, 'qc')):
                os.mkdir(join(sample_bam_dir, 'qc'))

            logfilename = join(sample_bam_dir, 'qc',
                               '{}_{}-qualimap.log'.format(patientid, t))
            log_file = open(logfilename, 'w+')
            logging.basicConfig(
                format='%(asctime)s %(levelname)s: %(message)s',
                datefmt='%H:%M:%S',
                level=logging.DEBUG)

            logging.info('Full command: "{}"'.format(cmd))
            logging.info('See log file: {}'.format(logfilename))
            c = subprocess.Popen(cmd,
                                 shell=True,
                                 stderr=log_file,
                                 stdout=log_file)
            jobs["{}_{}".format(patientid, t)] = c

        returncodes = {}
        for p, j in jobs.items():
            j.wait()
            logging.debug('Return code of {} is: {}'.format(p, j.returncode))
            returncodes[p] = j.returncode

        elapsed_time = self.timer.get_elapsed_time()
        logging.info(elapsed_time)

        if max(returncodes.values()) == 0 and min(returncodes.values()) == 0:
            send_email('*****@*****.**',
                       "{} Qualimap done".format(patientid), elapsed_time)
        else:
            send_email('*****@*****.**',
                       "FAILURE of {} Qualimap".format(patientid),
                       "{}\n\n{}".format(elapsed_time, returncodes))
Beispiel #7
0
    def run(self,
            patient_dir=None,
            patient_id=None,
            sample_dir=None,
            scratch_dir=None,
            output_dir=None):
        assert patient_dir != sample_dir

        if output_dir is None:
            output_dir = os.getcwd()

        super().run(sample_dir, output_dir, patient_id=patient_id)

        conf_dict = self.load_configuration()

        del output_dir

        res_file = self.output_files.generate('defuse', 'results.tsv')
        if os.path.isfile(res_file):
            logger.warning(
                "FILE already exists {}. \n\t EXITING".format(res_file))
            return

        sample_out_dir = self.work_dirs.get('defuse')

        if scratch_dir is None:
            scratch_dir = self.work_dirs.get('defuse', scratch=True)

        commands = []

        commands.append(
            ExternalCommand(CMD_FASTA.format(
                **{
                    "INPUT_DIR": sample_dir,
                    "SCRATCH_DIR": scratch_dir,
                    "SAMPLE": self.sample
                }),
                            output_file=os.path.join(scratch_dir, self.sample +
                                                     "_R2.fastq")))

        commands.append(
            ExternalCommand(
                CMD_DEFUSE.format(
                    **{
                        "CORES": conf_dict[DEFUSE_CORES],
                        "DEFUSE_CONFIG": conf_dict[DEFUSE_CONFIG],
                        "DEFUSE_DATA_DIR": conf_dict[DEFUSE_DATA_DIR],
                        "SCRATCH_DIR": scratch_dir,
                        "SAMPLE": self.sample
                    })))

        commands.append(
            ExternalCommand(
                CMD_CLEANUP.format(
                    **{
                        "SCRATCH_DIR": scratch_dir,
                        "SAMPLE": self.sample,
                        "OUTPUT_FOLDER": sample_out_dir
                    })))

        logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
                            datefmt='%H:%M:%S',
                            level=logging.DEBUG)

        for external_command in commands:
            c = external_command.run()
            elapsed_time = self.timer.get_elapsed_time()
            if c != 0:
                send_email('*****@*****.**',
                           "FAILURE of {}  DEFUSE".format(self.sample),
                           elapsed_time)
                return

        logging.info(elapsed_time)

        send_email('*****@*****.**',
                   "{} DEFUSE done".format(self.sample), elapsed_time)