def fq_trimming(pipeline, fq1_files, fq2_files, clinseq_barcode, ref, outdir, maxcores=1): fq1_abs = [normpath(x) for x in fq1_files] fq2_abs = [normpath(x) for x in fq2_files] logging.debug("Trimming {} and {}".format(fq1_abs, fq2_abs)) pairs = [(fq1_abs[k], fq2_abs[k]) for k in range(len(fq1_abs))] fq1_trimmed = [] fq2_trimmed = [] for fq1, fq2 in pairs: skewer = Skewer() skewer.input1 = fq1 skewer.input2 = fq2 skewer.output1 = outdir + "/skewer/libs/{}".format( os.path.basename(fq1)) skewer.output2 = outdir + "/skewer/libs/{}".format( os.path.basename(fq2)) skewer.stats = outdir + "/skewer/libs/skewer-stats-{}.log".format( os.path.basename(fq1)) skewer.threads = maxcores skewer.jobname = "skewer/{}".format(os.path.basename(fq1)) skewer.scratch = pipeline.scratch skewer.is_intermediate = True fq1_trimmed.append(skewer.output1) fq2_trimmed.append(skewer.output2) pipeline.add(skewer) cat1 = Cat() cat1.input = fq1_trimmed cat1.output = outdir + "/skewer/{}-concatenated_1.fastq.gz".format( clinseq_barcode) cat1.jobname = "cat1/{}".format(clinseq_barcode) cat1.is_intermediate = True pipeline.add(cat1) cat2 = Cat() cat2.input = fq2_trimmed cat2.jobname = "cat2/{}".format(clinseq_barcode) cat2.output = outdir + "/skewer/{}-concatenated_2.fastq.gz".format( clinseq_barcode) cat2.is_intermediate = True pipeline.add(cat2) return cat1.output, cat2.output
def align_se(pipeline, fq1_files, clinseq_barcode, ref, outdir, maxcores, remove_duplicates=True): """ Align single end data :param pipeline: :param fq1_files: :param lib: :param ref: :param outdir: :param maxcores: :param remove_duplicates: :return: """ logging.debug("Aligning files: {}".format(fq1_files)) fq1_abs = [normpath(x) for x in fq1_files] fq1_trimmed = [] for fq1 in fq1_abs: skewer = Skewer() skewer.input1 = fq1 skewer.input2 = None skewer.output1 = outdir + "/skewer/{}".format(os.path.basename(fq1)) skewer.output2 = outdir + "/skewer/unused-dummyfq2-{}".format(os.path.basename(fq1)) skewer.stats = outdir + "/skewer/skewer-stats-{}.log".format(os.path.basename(fq1)) skewer.threads = maxcores skewer.jobname = "skewer/{}".format(os.path.basename(fq1)) skewer.scratch = pipeline.scratch skewer.is_intermediate = True fq1_trimmed.append(skewer.output1) pipeline.add(skewer) cat1 = Cat() cat1.input = fq1_trimmed cat1.output = outdir + "/skewer/{}_1.fastq.gz".format(clinseq_barcode) cat1.jobname = "cat/{}".format(clinseq_barcode) cat1.is_intermediate = False pipeline.add(cat1) bwa = Bwa() bwa.input_fastq1 = cat1.output bwa.input_reference_sequence = ref bwa.remove_duplicates = remove_duplicates library_id = parse_prep_id(clinseq_barcode) sample_string = compose_sample_str(extract_unique_capture(clinseq_barcode)) bwa.readgroup = "\"@RG\\tID:{rg_id}\\tSM:{rg_sm}\\tLB:{rg_lb}\\tPL:ILLUMINA\"".format(\ rg_id=clinseq_barcode, rg_sm=sample_string, rg_lb=library_id) bwa.threads = maxcores bwa.output = "{}/{}.bam".format(outdir, clinseq_barcode) bwa.scratch = pipeline.scratch bwa.jobname = "bwa/{}".format(clinseq_barcode) bwa.is_intermediate = False pipeline.add(bwa) return bwa.output
def __init__(self, outdir, first, second, third, **kwargs): PypedreamPipeline.__init__(self, outdir, **kwargs) rnd1 = Urandom() rnd1.output = outdir + "/" + first self.add(rnd1) rnd2 = Ifail() rnd2.output = outdir + "/" + second self.add(rnd2) cat1 = Cat() cat1.input = [rnd1.output, rnd2.output] cat1.output = outdir + "/" + third self.add(cat1)
def __init__(self, outdir, first, second, third, **kwargs): PypedreamPipeline.__init__(self, outdir, **kwargs) rnd1 = Urandom() rnd1.jobname = "urandom-{}".format(first) rnd1.output = outdir + "/" + first rnd1.threads = 1 self.add(rnd1) rnd2 = Urandom() rnd2.jobname = "urandom-{}".format(second) rnd2.output = outdir + "/" + second rnd2.is_intermediate = True self.add(rnd2) cat1 = Cat() cat1.jobname = "cat1-{}".format(third) cat1.input = [rnd1.output, rnd2.output] cat1.output = outdir + "/" + third self.add(cat1)
def align_pe(pipeline, fq1_files, fq2_files, clinseq_barcode, ref, outdir, maxcores=1, remove_duplicates=True): """ align paired end data :param pipeline: :param fq1_files: :param fq2_files: :param lib: :param ref: :param outdir: :param maxcores: :param remove_duplicates: :return: """ fq1_abs = [normpath(x) for x in fq1_files] fq2_abs = [normpath(x) for x in fq2_files] logging.debug("Trimming {} and {}".format(fq1_abs, fq2_abs)) pairs = [(fq1_abs[k], fq2_abs[k]) for k in range(len(fq1_abs))] fq1_trimmed = [] fq2_trimmed = [] for fq1, fq2 in pairs: skewer = Skewer() skewer.input1 = fq1 skewer.input2 = fq2 skewer.output1 = outdir + "/skewer/libs/{}".format(os.path.basename(fq1)) skewer.output2 = outdir + "/skewer/libs/{}".format(os.path.basename(fq2)) skewer.stats = outdir + "/skewer/libs/skewer-stats-{}.log".format(os.path.basename(fq1)) skewer.threads = maxcores skewer.jobname = "skewer/{}".format(os.path.basename(fq1)) skewer.scratch = pipeline.scratch skewer.is_intermediate = True fq1_trimmed.append(skewer.output1) fq2_trimmed.append(skewer.output2) pipeline.add(skewer) cat1 = Cat() cat1.input = fq1_trimmed cat1.output = outdir + "/skewer/{}-concatenated_1.fastq.gz".format(clinseq_barcode) cat1.jobname = "cat1/{}".format(clinseq_barcode) cat1.is_intermediate = True pipeline.add(cat1) cat2 = Cat() cat2.input = fq2_trimmed cat2.jobname = "cat2/{}".format(clinseq_barcode) cat2.output = outdir + "/skewer/{}-concatenated_2.fastq.gz".format(clinseq_barcode) cat2.is_intermediate = True pipeline.add(cat2) bwa = Bwa() bwa.input_fastq1 = cat1.output bwa.input_fastq2 = cat2.output bwa.input_reference_sequence = ref bwa.remove_duplicates = remove_duplicates library_id = parse_prep_id(clinseq_barcode) sample_string = compose_sample_str(extract_unique_capture(clinseq_barcode)) bwa.readgroup = "\"@RG\\tID:{rg_id}\\tSM:{rg_sm}\\tLB:{rg_lb}\\tPL:ILLUMINA\"".format(\ rg_id=clinseq_barcode, rg_sm=sample_string, rg_lb=library_id) bwa.threads = maxcores bwa.output = "{}/{}.bam".format(outdir, clinseq_barcode) bwa.jobname = "bwa/{}".format(clinseq_barcode) bwa.scratch = pipeline.scratch bwa.is_intermediate = False pipeline.add(bwa) return bwa.output