def reMergeBamfiles(infiles, sentinel): infiles = [P.snip(x, ".sentinel") + ".bam" for x in infiles] outfile = P.snip(sentinel, ".sentinel") + ".bam" bad_samples = PARAMS["options_to_remove"].split(",") to_merge = IDR.filterBadLibraries(infiles, bad_samples) IDR.mergeBams(to_merge, outfile) P.touch(sentinel)
def summarizePeaksForPooledPseudoreplicates(infiles, outfile): outf = iotools.openFile(outfile, "w") outf.write("Sample_id\t" "Experiment\t" "Tissue\t" "Condition\t" "Pseudoreplicate\t" "n_peaks\n") IDR.countPeaks(infiles, outf)
def poolSampleBamfiles(infiles, sentinel): """ Merge filtered sample files for each tissue """ infiles = [P.snip(x, ".sentinel") + ".bam" for x in infiles] outfile = P.snip(sentinel, ".sentinel") + ".bam" IDR.mergeBams(infiles, outfile) P.touch(sentinel)
def callPeaksOnPseudoreplicates(infile, outfile): # fetch peak calling parameters PARAMS_PEAKCALLER = get_peak_caller_parameters( PARAMS["options_peak_caller"]) # call peaks on pseudoreplicates IDR.callIDRPeaks(infile, outfile, PARAMS["options_peak_caller"], PARAMS["options_control_type"], PARAMS_PEAKCALLER, pseudoreplicate=True)
def callPeaksOnIndividualReplicates(infile, outfile): infile = P.snip(infile, ".sentinel") + ".bam" # fetch peak calling parameters PARAMS_PEAKCALLER = get_peak_caller_parameters( PARAMS["options_peak_caller"]) # call peaks IDR.callIDRPeaks(infile, outfile, PARAMS["options_peak_caller"], PARAMS["options_control_type"], PARAMS_PEAKCALLER) P.touch(outfile)
def poolInputBamfiles(infiles, sentinel): """ Merge filtered input files for each tissue, with the option of excluding undesirable libraries. """ infiles = [P.snip(x, ".sentinel") + ".bam" for x in infiles] outfile = P.snip(sentinel, ".sentinel") + ".bam" bad_samples = PARAMS["filter_remove_inputs"].split(",") if len(infiles) > 1: to_merge = IDR.filterBadLibraries(infiles, bad_samples) IDR.mergeBams(to_merge, outfile) else: os.symlink(os.path.abspath(infiles[0]), outfile) os.symlink(os.path.abspath(infiles[0]) + ".bai", outfile + ".bai") P.touch(sentinel)
def runIDROnPooledPseudoreplicates(infiles, outfile): """ Run IDR analysis on pooled pseudoreplicates for each EXPERIMENT """ # set IDR parameters chr_table = os.path.join(PARAMS["annotations_dir"], PARAMS["annotations_interface_contigs"]) # get statement statement = IDR.getIDRStatement(infiles[0], infiles[1], outfile, PARAMS["idr_options_overlap_ratio"], PARAMS["idr_options_ranking_measure"], chr_table) # run E.info("applyIDR: processing %s and %s" % (infiles[0], infiles[1])) job_memory = "5G" P.run()
def runIDROnIndividualReplicates(infiles, outfile): """ Run IDR consecutively for each pairwise combination of a particular EXPERIMENT """ # set IDR parameters (HACK!) WrapperIDR is in /ifs/devel/cgat chr_table = os.path.join(PARAMS["annotations_dir"], PARAMS["annotations_interface_contigs"]) # iterate through pairwise combinations of infiles for infile1, infile2 in itertools.combinations(infiles, 2): # get statement statement = IDR.getIDRStatement(infile1, infile2, outfile, PARAMS["idr_options_overlap_ratio"], PARAMS["idr_options_ranking_measure"], chr_table) # run E.info("applyIDR: processing %s and %s" % (infile1, infile2)) job_memory = "5G" P.run()
def plotBatchConsistencyForPooledPseudoreplicates(infiles, outfile): statement = IDR.getIDRPlotStatement(infiles[0], outfile) P.run()
def plotBatchConsistencyForIndividualReplicates(infiles, outfile): # HACK! statement = IDR.getIDRPlotStatement(infiles, outfile) P.run()