Ejemplo n.º 1
0
 def deeptools_bamCoverage(self):
     func_args = {'scaleFactor': 1.0}
     from deeptools import bamCoverage
     from deeptools import writeBedGraph
     wr = writeBedGraph.WriteBedGraph(
         [self.treat_bam, self.control_bam],
         binLength=self.binSize,
         stepSize=self.binSize,
         region=None,
         blackListFileName=None,
         numberOfProcessors=self.numberOfProcessors,
         extendReads=False,
         minMappingQuality=None,
         ignoreDuplicates=True,
         center_read=False,
         zerosToNans=False,
         samFlag_include=None,
         samFlag_exclude=None,
         minFragmentLength=0,
         maxFragmentLength=0,
         verbose=True,
     )
     wr.run(
         writeBedGraph.scaleCoverage,
         func_args,
         self.outFileName,
         blackListFileName=None,
     )
Ejemplo n.º 2
0
    def setUp(self):
        """
        The distribution of reads between the two bam files is as follows.

        They cover 200 bp::

              0                              100                           200
              |------------------------------------------------------------|
            A                                ==============>
                                                            <==============


            B                 <==============               ==============>
                                             ==============>
                                                            ==============>
        """

        self.root = ROOT
        self.bamFile1 = self.root + "testA.bam"
        self.bamFile2 = self.root + "testB.bam"
        self.bamFile_PE = self.root + "test_paired2.bam"
        self.chrom = '3R'

        self.step_size = 50
        self.bin_length = 50
        self.func_args = {'scaleFactor': 1.0}

        self.c = wr.WriteBedGraph([self.bamFile1],
                                  binLength=self.bin_length,
                                  stepSize=self.step_size)
Ejemplo n.º 3
0
    def test_writeBedGraph_worker_ignore_duplicates(self):
        self.c = wr.WriteBedGraph([self.bamFile2],
                                  binLength=self.bin_length,
                                  stepSize=self.step_size,
                                  ignoreDuplicates=True)
        self.c.zerosToNans = True

        tempFile = self.c.writeBedGraph_worker('3R', 0, 200, scaleCoverage,
                                               self.func_args)
        res = open(tempFile, 'r').readlines()
        assert_equal(res, ['3R\t50\t200\t1.0\n'])
        os.remove(tempFile)
Ejemplo n.º 4
0
def main(args=None):
    """
    The algorithm is composed of two parts.

    1. Using the SES or read counts method, appropriate scaling
       factors are determined to account for sequencing depth differences.

    2. The genome is transversed, scaling the BAM files, and computing
       the log ratio/ratio/difference for bins of fixed width
       given by the user.

    """
    args = process_args(args)

    scale_factors = get_scale_factors(args)
    if args.verbose:
        print("Individual scale factors are {0}".format(scale_factors))

    # the getRatio function is called and receives
    # the func_args per each tile that is considered
    FUNC = getRatio
    func_args = {
        'valueType': args.ratio,
        'scaleFactors': scale_factors,
        'pseudocount': args.pseudocount
    }

    wr = writeBedGraph.WriteBedGraph(
        [args.bamfile1, args.bamfile2],
        args.binSize,
        0,
        stepSize=args.binSize,
        region=args.region,
        numberOfProcessors=args.numberOfProcessors,
        extendReads=args.extendReads,
        blackListFileName=args.blackListFileName,
        minMappingQuality=args.minMappingQuality,
        ignoreDuplicates=args.ignoreDuplicates,
        center_read=args.centerReads,
        zerosToNans=args.skipNonCoveredRegions,
        samFlag_include=args.samFlagInclude,
        samFlag_exclude=args.samFlagExclude,
        minFragmentLength=args.minFragmentLength,
        maxFragmentLength=args.maxFragmentLength,
        verbose=args.verbose)

    wr.run(FUNC,
           func_args,
           args.outFileName,
           blackListFileName=args.blackListFileName,
           format=args.outFileFormat,
           smoothLength=args.smoothLength)
    def setUp(self):
        """
        As above, but for CRAM files
        """

        self.root = ROOT
        self.bamFile1 = self.root + "testA.cram"
        self.bamFile2 = self.root + "testB.cram"
        self.bamFile_PE = self.root + "test_paired2.cram"
        self.chrom = '3R'

        self.step_size = 50
        self.bin_length = 50
        self.func_args = {'scaleFactor': 1.0}

        self.c = wr.WriteBedGraph([self.bamFile1],
                                  binLength=self.bin_length,
                                  stepSize=self.step_size)
Ejemplo n.º 6
0
 def deeptools_bamCompare(self):
     print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
     print(self.name)
     from deeptools import writeBedGraph
     from deeptools.SES_scaleFactor import estimateScaleFactor
     from deeptools import parserCommon
     from deeptools import bamHandler
     from deeptools import getRatio
     from deeptools.getScaleFactor import get_num_kept_reads
     debug = 0
     FUNC = getRatio.getRatio
     #log2,ratio,subtract,add,mean,reciprocal_ratio,first,second
     func_args = {
         'valueType': "ratio",
         'scaleFactors': [1, 1],
         'pseudocount': 1
     }
     from deeptools import writeBedGraph
     wr = writeBedGraph.WriteBedGraph(
         [self.treat_bam, self.control_bam],
         self.binSize,
         0,
         stepSize=self.binSize,
         region=None,
         numberOfProcessors=self.numberOfProcessors,
         extendReads=False,
         blackListFileName=None,
         minMappingQuality=None,
         ignoreDuplicates=False,
         center_read=False,
         zerosToNans=False,
         samFlag_include=None,
         samFlag_exclude=None,
         minFragmentLength=0,
         maxFragmentLength=0,
         verbose=True,
     )
     wr.run(FUNC, func_args, self.outFileName)
Ejemplo n.º 7
0
def main(args=None):
    args = process_args(args)

    global debug
    if args.verbose:
        sys.stderr.write("Specified --scaleFactor: {}\n".format(
            args.scaleFactor))
        debug = 1
    else:
        debug = 0

    if args.normalizeUsing == 'None':
        args.normalizeUsing = None  # For the sake of sanity

    if args.normalizeUsing:
        # if a normalization is required then compute the scale factors
        bam, mapped, unmapped, stats = openBam(
            args.bam, returnStats=True, nThreads=args.numberOfProcessors)
        bam.close()
        scale_factor = get_scale_factor(args, stats)
    else:
        scale_factor = args.scaleFactor

    func_args = {'scaleFactor': scale_factor}

    # This fixes issue #520, where --extendReads wasn't honored if --filterRNAstrand was used
    if args.filterRNAstrand and not args.Offset:
        args.Offset = [1, -1]

    if args.MNase:
        # check that library is paired end
        # using getFragmentAndReadSize
        from deeptools.getFragmentAndReadSize import get_read_and_fragment_length
        frag_len_dict, read_len_dict = get_read_and_fragment_length(
            args.bam,
            return_lengths=False,
            blackListFileName=args.blackListFileName,
            numberOfProcessors=args.numberOfProcessors,
            verbose=args.verbose)
        if frag_len_dict is None:
            sys.exit(
                "*Error*: For the --MNAse function a paired end library is required. "
            )

        # Set some default fragment length bounds
        if args.minFragmentLength == 0:
            args.minFragmentLength = 130
        if args.maxFragmentLength == 0:
            args.maxFragmentLength = 200

        wr = CenterFragment(
            [args.bam],
            binLength=args.binSize,
            stepSize=args.binSize,
            region=args.region,
            blackListFileName=args.blackListFileName,
            numberOfProcessors=args.numberOfProcessors,
            extendReads=args.extendReads,
            minMappingQuality=args.minMappingQuality,
            ignoreDuplicates=args.ignoreDuplicates,
            center_read=args.centerReads,
            zerosToNans=args.skipNonCoveredRegions,
            samFlag_include=args.samFlagInclude,
            samFlag_exclude=args.samFlagExclude,
            minFragmentLength=args.minFragmentLength,
            maxFragmentLength=args.maxFragmentLength,
            chrsToSkip=args.ignoreForNormalization,
            verbose=args.verbose,
        )

    elif args.Offset:
        if len(args.Offset) > 1:
            if args.Offset[0] == 0:
                sys.exit(
                    "*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment."
                )
            if args.Offset[1] > 0 and args.Offset[1] < args.Offset[0]:
                sys.exir(
                    "'Error*: The right side bound is less than the left-side bound. This is inappropriate."
                )
        else:
            if args.Offset[0] == 0:
                sys.exit(
                    "*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment."
                )
        wr = OffsetFragment([args.bam],
                            binLength=args.binSize,
                            stepSize=args.binSize,
                            region=args.region,
                            numberOfProcessors=args.numberOfProcessors,
                            extendReads=args.extendReads,
                            minMappingQuality=args.minMappingQuality,
                            ignoreDuplicates=args.ignoreDuplicates,
                            center_read=args.centerReads,
                            zerosToNans=args.skipNonCoveredRegions,
                            samFlag_include=args.samFlagInclude,
                            samFlag_exclude=args.samFlagExclude,
                            minFragmentLength=args.minFragmentLength,
                            maxFragmentLength=args.maxFragmentLength,
                            chrsToSkip=args.ignoreForNormalization,
                            verbose=args.verbose)
        wr.filter_strand = args.filterRNAstrand
        wr.Offset = args.Offset
    else:
        wr = writeBedGraph.WriteBedGraph(
            [args.bam],
            binLength=args.binSize,
            stepSize=args.binSize,
            region=args.region,
            blackListFileName=args.blackListFileName,
            numberOfProcessors=args.numberOfProcessors,
            extendReads=args.extendReads,
            minMappingQuality=args.minMappingQuality,
            ignoreDuplicates=args.ignoreDuplicates,
            center_read=args.centerReads,
            zerosToNans=args.skipNonCoveredRegions,
            samFlag_include=args.samFlagInclude,
            samFlag_exclude=args.samFlagExclude,
            minFragmentLength=args.minFragmentLength,
            maxFragmentLength=args.maxFragmentLength,
            chrsToSkip=args.ignoreForNormalization,
            verbose=args.verbose,
        )

    wr.run(writeBedGraph.scaleCoverage,
           func_args,
           args.outFileName,
           blackListFileName=args.blackListFileName,
           format=args.outFileFormat,
           smoothLength=args.smoothLength)
Ejemplo n.º 8
0
def main(args=None):
    args = process_args(args)

    global debug
    if args.verbose:
        debug = 1
    else:
        debug = 0

    func_args = {'scaleFactor': get_scale_factor(args)}

    if args.MNase:
        # check that library is paired end
        # using getFragmentAndReadSize
        from deeptools.getFragmentAndReadSize import get_read_and_fragment_length
        frag_len_dict, read_len_dict = get_read_and_fragment_length(
            args.bam,
            return_lengths=False,
            blackListFileName=args.blackListFileName,
            numberOfProcessors=args.numberOfProcessors,
            verbose=args.verbose)
        if frag_len_dict is None:
            sys.exit(
                "*Error*: For the --MNAse function a paired end library is required. "
            )

        # Set some default fragment length bounds
        if args.minFragmentLength == 0:
            args.minFragmentLength = 130
        if args.maxFragmentLength == 0:
            args.maxFragmentLength = 200

        wr = CenterFragment(
            [args.bam],
            binLength=args.binSize,
            stepSize=args.binSize,
            region=args.region,
            blackListFileName=args.blackListFileName,
            numberOfProcessors=args.numberOfProcessors,
            extendReads=args.extendReads,
            minMappingQuality=args.minMappingQuality,
            ignoreDuplicates=args.ignoreDuplicates,
            center_read=args.centerReads,
            zerosToNans=args.skipNonCoveredRegions,
            samFlag_include=args.samFlagInclude,
            samFlag_exclude=args.samFlagExclude,
            minFragmentLength=args.minFragmentLength,
            maxFragmentLength=args.maxFragmentLength,
            verbose=args.verbose,
        )

    elif args.Offset:
        if len(args.Offset) > 1:
            if args.Offset[0] == 0:
                sys.exit(
                    "*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment."
                )
            if args.Offset[1] > 0 and args.Offset[1] < args.Offset[0]:
                sys.exir(
                    "'Error*: The right side bound is less than the left-side bound. This is inappropriate."
                )
        else:
            if args.Offset[0] == 0:
                sys.exit(
                    "*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment."
                )
        wr = OffsetFragment([args.bam],
                            binLength=args.binSize,
                            stepSize=args.binSize,
                            region=args.region,
                            numberOfProcessors=args.numberOfProcessors,
                            extendReads=args.extendReads,
                            minMappingQuality=args.minMappingQuality,
                            ignoreDuplicates=args.ignoreDuplicates,
                            center_read=args.centerReads,
                            zerosToNans=args.skipNonCoveredRegions,
                            samFlag_include=args.samFlagInclude,
                            samFlag_exclude=args.samFlagExclude,
                            minFragmentLength=args.minFragmentLength,
                            maxFragmentLength=args.maxFragmentLength,
                            verbose=args.verbose)
        wr.filter_strand = args.filterRNAstrand
        wr.Offset = args.Offset

    elif args.filterRNAstrand:
        wr = filterRnaStrand(
            [args.bam],
            binLength=args.binSize,
            stepSize=args.binSize,
            region=args.region,
            numberOfProcessors=args.numberOfProcessors,
            extendReads=args.extendReads,
            minMappingQuality=args.minMappingQuality,
            ignoreDuplicates=args.ignoreDuplicates,
            center_read=args.centerReads,
            zerosToNans=args.skipNonCoveredRegions,
            samFlag_include=args.samFlagInclude,
            samFlag_exclude=args.samFlagExclude,
            minFragmentLength=args.minFragmentLength,
            maxFragmentLength=args.maxFragmentLength,
            verbose=args.verbose,
        )

        wr.filter_strand = args.filterRNAstrand
    else:
        wr = writeBedGraph.WriteBedGraph(
            [args.bam],
            binLength=args.binSize,
            stepSize=args.binSize,
            region=args.region,
            blackListFileName=args.blackListFileName,
            numberOfProcessors=args.numberOfProcessors,
            extendReads=args.extendReads,
            minMappingQuality=args.minMappingQuality,
            ignoreDuplicates=args.ignoreDuplicates,
            center_read=args.centerReads,
            zerosToNans=args.skipNonCoveredRegions,
            samFlag_include=args.samFlagInclude,
            samFlag_exclude=args.samFlagExclude,
            minFragmentLength=args.minFragmentLength,
            maxFragmentLength=args.maxFragmentLength,
            verbose=args.verbose,
        )

    wr.run(writeBedGraph.scaleCoverage,
           func_args,
           args.outFileName,
           blackListFileName=args.blackListFileName,
           format=args.outFileFormat,
           smoothLength=args.smoothLength)
Ejemplo n.º 9
0
def main(args=None):
    args = process_args(args)

    global debug
    if args.verbose:
        debug = 1
    else:
        debug = 0

    func_args = {'scaleFactor': get_scale_factor(args)}
    if args.MNase:
        # check that library is paired end
        # using getFragmentAndReadSize
        from deeptools.getFragmentAndReadSize import get_read_and_fragment_length
        frag_len_dict, read_len_dict = get_read_and_fragment_length(
            args.bam,
            args.bamIndex,
            return_lengths=False,
            numberOfProcessors=args.numberOfProcessors,
            verbose=args.verbose)
        if frag_len_dict is None:
            exit(
                "*Error*: For the --MNAse function a paired end library is required. "
            )

        wr = CenterFragment(
            [args.bam],
            binLength=args.binSize,
            stepSize=args.binSize,
            region=args.region,
            numberOfProcessors=args.numberOfProcessors,
            extendReads=args.extendReads,
            minMappingQuality=args.minMappingQuality,
            ignoreDuplicates=args.ignoreDuplicates,
            center_read=args.centerReads,
            zerosToNans=args.skipNonCoveredRegions,
            samFlag_include=args.samFlagInclude,
            samFlag_exclude=args.samFlagExclude,
            verbose=args.verbose,
        )

    else:
        wr = writeBedGraph.WriteBedGraph(
            [args.bam],
            binLength=args.binSize,
            stepSize=args.binSize,
            region=args.region,
            numberOfProcessors=args.numberOfProcessors,
            extendReads=args.extendReads,
            minMappingQuality=args.minMappingQuality,
            ignoreDuplicates=args.ignoreDuplicates,
            center_read=args.centerReads,
            zerosToNans=args.skipNonCoveredRegions,
            samFlag_include=args.samFlagInclude,
            samFlag_exclude=args.samFlagExclude,
            verbose=args.verbose,
        )

    wr.run(writeBedGraph.scaleCoverage,
           func_args,
           args.outFileName,
           format=args.outFileFormat,
           smooth_length=args.smoothLength)
Ejemplo n.º 10
0
def main(args=None):
    """
    The algorithm is composed of two steps.


    1. Per-sample scaling / depth Normalization:
     + If scaling is used (using the SES or read counts method), appropriate scaling
       factors are determined to account for sequencing depth differences.
     + Optionally scaling can be turned off and individual samples could be depth normalized using
       RPKM, BPM or CPM methods

    2. Ratio calculation between two bam files:
     + The genome is transversed and computing
       the log ratio/ratio/difference etc. for bins of fixed width
       given by the user.

    """
    args = process_args(args)

    if args.normalizeUsing == "RPGC":
        sys.exit(
            "RPGC normalization (--normalizeUsing RPGC) is not supported with bamCompare!"
        )
    if args.normalizeUsing == 'None':
        args.normalizeUsing = None  # For the sake of sanity
    if args.scaleFactorsMethod != 'None' and args.normalizeUsing:
        sys.exit(
            "`--normalizeUsing {}` is only valid if you also use `--scaleFactorsMethod None`! To prevent erroneous output, I will quit now.\n"
            .format(args.normalizeUsing))

    # Get mapping statistics
    bam1, mapped1, unmapped1, stats1 = bamHandler.openBam(
        args.bamfile1, returnStats=True, nThreads=args.numberOfProcessors)
    bam1.close()
    bam2, mapped2, unmapped2, stats2 = bamHandler.openBam(
        args.bamfile2, returnStats=True, nThreads=args.numberOfProcessors)
    bam2.close()

    scale_factors = get_scale_factors(args, [stats1, stats2],
                                      [mapped1, mapped2])
    if scale_factors is None:
        # check whether one of the depth norm methods are selected
        if args.normalizeUsing is not None:
            args.scaleFactor = 1.0
            # if a normalization is required then compute the scale factors
            args.bam = args.bamfile1
            scale_factor_bam1 = get_scale_factor(args, stats1)
            args.bam = args.bamfile2
            scale_factor_bam2 = get_scale_factor(args, stats2)
            scale_factors = [scale_factor_bam1, scale_factor_bam2]
        else:
            scale_factors = [1, 1]

    if args.verbose:
        print("Individual scale factors are {0}".format(scale_factors))

    # the getRatio function is called and receives
    # the func_args per each tile that is considered
    FUNC = getRatio
    func_args = {
        'valueType': args.operation,
        'scaleFactors': scale_factors,
        'pseudocount': args.pseudocount
    }

    wr = writeBedGraph.WriteBedGraph(
        [args.bamfile1, args.bamfile2],
        args.binSize,
        0,
        stepSize=args.binSize,
        region=args.region,
        numberOfProcessors=args.numberOfProcessors,
        extendReads=args.extendReads,
        blackListFileName=args.blackListFileName,
        minMappingQuality=args.minMappingQuality,
        ignoreDuplicates=args.ignoreDuplicates,
        center_read=args.centerReads,
        zerosToNans=args.skipNonCoveredRegions,
        skipZeroOverZero=args.skipZeroOverZero,
        samFlag_include=args.samFlagInclude,
        samFlag_exclude=args.samFlagExclude,
        minFragmentLength=args.minFragmentLength,
        maxFragmentLength=args.maxFragmentLength,
        chrsToSkip=args.ignoreForNormalization,
        verbose=args.verbose)

    wr.run(FUNC,
           func_args,
           args.outFileName,
           blackListFileName=args.blackListFileName,
           format=args.outFileFormat,
           smoothLength=args.smoothLength)