def main(args=None): args = process_args(args) global debug if args.verbose: sys.stderr.write("Specified --scaleFactor: {}\n".format( args.scaleFactor)) debug = 1 else: debug = 0 if args.normalizeUsing == 'None': args.normalizeUsing = None # For the sake of sanity if args.normalizeUsing: # if a normalization is required then compute the scale factors bam, mapped, unmapped, stats = openBam( args.bam, returnStats=True, nThreads=args.numberOfProcessors) bam.close() scale_factor = get_scale_factor(args, stats) else: scale_factor = args.scaleFactor func_args = {'scaleFactor': scale_factor} # This fixes issue #520, where --extendReads wasn't honored if --filterRNAstrand was used if args.filterRNAstrand and not args.Offset: args.Offset = [1, -1] if args.MNase: # check that library is paired end # using getFragmentAndReadSize from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length( args.bam, return_lengths=False, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if frag_len_dict is None: sys.exit( "*Error*: For the --MNAse function a paired end library is required. " ) # Set some default fragment length bounds if args.minFragmentLength == 0: args.minFragmentLength = 130 if args.maxFragmentLength == 0: args.maxFragmentLength = 200 wr = CenterFragment( [args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, chrsToSkip=args.ignoreForNormalization, verbose=args.verbose, ) elif args.Offset: if len(args.Offset) > 1: if args.Offset[0] == 0: sys.exit( "*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment." ) if args.Offset[1] > 0 and args.Offset[1] < args.Offset[0]: sys.exir( "'Error*: The right side bound is less than the left-side bound. This is inappropriate." ) else: if args.Offset[0] == 0: sys.exit( "*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment." ) wr = OffsetFragment([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, chrsToSkip=args.ignoreForNormalization, verbose=args.verbose) wr.filter_strand = args.filterRNAstrand wr.Offset = args.Offset else: wr = writeBedGraph.WriteBedGraph( [args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, chrsToSkip=args.ignoreForNormalization, verbose=args.verbose, ) wr.run(writeBedGraph.scaleCoverage, func_args, args.outFileName, blackListFileName=args.blackListFileName, format=args.outFileFormat, smoothLength=args.smoothLength)
def main(args=None): args = process_args(args) global debug if args.verbose: debug = 1 else: debug = 0 func_args = {'scaleFactor': get_scale_factor(args)} if args.MNase: # check that library is paired end # using getFragmentAndReadSize from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length( args.bam, return_lengths=False, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if frag_len_dict is None: sys.exit( "*Error*: For the --MNAse function a paired end library is required. " ) # Set some default fragment length bounds if args.minFragmentLength == 0: args.minFragmentLength = 130 if args.maxFragmentLength == 0: args.maxFragmentLength = 200 wr = CenterFragment( [args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, verbose=args.verbose, ) elif args.Offset: if len(args.Offset) > 1: if args.Offset[0] == 0: sys.exit( "*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment." ) if args.Offset[1] > 0 and args.Offset[1] < args.Offset[0]: sys.exir( "'Error*: The right side bound is less than the left-side bound. This is inappropriate." ) else: if args.Offset[0] == 0: sys.exit( "*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment." ) wr = OffsetFragment([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, verbose=args.verbose) wr.filter_strand = args.filterRNAstrand wr.Offset = args.Offset elif args.filterRNAstrand: wr = filterRnaStrand( [args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, verbose=args.verbose, ) wr.filter_strand = args.filterRNAstrand else: wr = writeBedGraph.WriteBedGraph( [args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, verbose=args.verbose, ) wr.run(writeBedGraph.scaleCoverage, func_args, args.outFileName, blackListFileName=args.blackListFileName, format=args.outFileFormat, smoothLength=args.smoothLength)
def main(args=None): args = process_args(args) global debug if args.verbose: sys.stderr.write("Specified --scaleFactor: {}\n".format(args.scaleFactor)) debug = 1 else: debug = 0 if args.normalizeUsing == 'None': args.normalizeUsing = None # For the sake of sanity elif args.normalizeUsing == 'RPGC' and not args.effectiveGenomeSize: sys.exit("RPGC normalization requires an --effectiveGenomeSize!\n") if args.normalizeUsing: # if a normalization is required then compute the scale factors bam, mapped, unmapped, stats = openBam(args.bam, returnStats=True, nThreads=args.numberOfProcessors) bam.close() scale_factor = get_scale_factor(args, stats) else: scale_factor = args.scaleFactor func_args = {'scaleFactor': scale_factor} # This fixes issue #520, where --extendReads wasn't honored if --filterRNAstrand was used if args.filterRNAstrand and not args.Offset: args.Offset = [1, -1] if args.MNase: # check that library is paired end # using getFragmentAndReadSize from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=False, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if frag_len_dict is None: sys.exit("*Error*: For the --MNAse function a paired end library is required. ") # Set some default fragment length bounds if args.minFragmentLength == 0: args.minFragmentLength = 130 if args.maxFragmentLength == 0: args.maxFragmentLength = 200 wr = CenterFragment([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, chrsToSkip=args.ignoreForNormalization, verbose=args.verbose, ) elif args.Offset: if len(args.Offset) > 1: if args.Offset[0] == 0: sys.exit("*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment.") if args.Offset[1] > 0 and args.Offset[1] < args.Offset[0]: sys.exir("'Error*: The right side bound is less than the left-side bound. This is inappropriate.") else: if args.Offset[0] == 0: sys.exit("*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment.") wr = OffsetFragment([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, chrsToSkip=args.ignoreForNormalization, verbose=args.verbose) wr.filter_strand = args.filterRNAstrand wr.Offset = args.Offset else: wr = writeBedGraph.WriteBedGraph([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, chrsToSkip=args.ignoreForNormalization, verbose=args.verbose, ) wr.run(writeBedGraph.scaleCoverage, func_args, args.outFileName, blackListFileName=args.blackListFileName, format=args.outFileFormat, smoothLength=args.smoothLength)
def main(args=None): """ The algorithm is composed of two steps. 1. Per-sample scaling / depth Normalization: + If scaling is used (using the SES or read counts method), appropriate scaling factors are determined to account for sequencing depth differences. + Optionally scaling can be turned off and individual samples could be depth normalized using RPKM, BPM or CPM methods 2. Ratio calculation between two bam files: + The genome is transversed and computing the log ratio/ratio/difference etc. for bins of fixed width given by the user. """ args = process_args(args) if args.normalizeUsing == "RPGC": sys.exit( "RPGC normalization (--normalizeUsing RPGC) is not supported with bamCompare!" ) if args.normalizeUsing == 'None': args.normalizeUsing = None # For the sake of sanity if args.scaleFactorsMethod != 'None' and args.normalizeUsing: sys.exit( "`--normalizeUsing {}` is only valid if you also use `--scaleFactorsMethod None`! To prevent erroneous output, I will quit now.\n" .format(args.normalizeUsing)) # Get mapping statistics bam1, mapped1, unmapped1, stats1 = bamHandler.openBam( args.bamfile1, returnStats=True, nThreads=args.numberOfProcessors) bam1.close() bam2, mapped2, unmapped2, stats2 = bamHandler.openBam( args.bamfile2, returnStats=True, nThreads=args.numberOfProcessors) bam2.close() scale_factors = get_scale_factors(args, [stats1, stats2], [mapped1, mapped2]) if scale_factors is None: # check whether one of the depth norm methods are selected if args.normalizeUsing is not None: args.scaleFactor = 1.0 # if a normalization is required then compute the scale factors args.bam = args.bamfile1 scale_factor_bam1 = get_scale_factor(args, stats1) args.bam = args.bamfile2 scale_factor_bam2 = get_scale_factor(args, stats2) scale_factors = [scale_factor_bam1, scale_factor_bam2] else: scale_factors = [1, 1] if args.verbose: print("Individual scale factors are {0}".format(scale_factors)) # the getRatio function is called and receives # the func_args per each tile that is considered FUNC = getRatio func_args = { 'valueType': args.operation, 'scaleFactors': scale_factors, 'pseudocount': args.pseudocount } wr = writeBedGraph.WriteBedGraph( [args.bamfile1, args.bamfile2], args.binSize, 0, stepSize=args.binSize, region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, blackListFileName=args.blackListFileName, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, skipZeroOverZero=args.skipZeroOverZero, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, chrsToSkip=args.ignoreForNormalization, verbose=args.verbose) wr.run(FUNC, func_args, args.outFileName, blackListFileName=args.blackListFileName, format=args.outFileFormat, smoothLength=args.smoothLength)
def main(args=None): args = process_args(args) global debug if args.verbose: debug = 1 else: debug = 0 if args.normalizeTo1x or args.normalizeUsingRPKM: # if a normalization is required then compute the scale factors scale_factor = get_scale_factor(args) else: scale_factor = args.scaleFactor func_args = {'scaleFactor': scale_factor} if args.MNase: # check that library is paired end # using getFragmentAndReadSize from deeptools.getFragmentAndReadSize import get_read_and_fragment_length frag_len_dict, read_len_dict = get_read_and_fragment_length(args.bam, return_lengths=False, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose) if frag_len_dict is None: sys.exit("*Error*: For the --MNAse function a paired end library is required. ") # Set some default fragment length bounds if args.minFragmentLength == 0: args.minFragmentLength = 130 if args.maxFragmentLength == 0: args.maxFragmentLength = 200 wr = CenterFragment([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, verbose=args.verbose, ) elif args.Offset: if len(args.Offset) > 1: if args.Offset[0] == 0: sys.exit("*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment.") if args.Offset[1] > 0 and args.Offset[1] < args.Offset[0]: sys.exir("'Error*: The right side bound is less than the left-side bound. This is inappropriate.") else: if args.Offset[0] == 0: sys.exit("*Error*: An offset of 0 isn't allowed, since offsets are 1-based positions inside each alignment.") wr = OffsetFragment([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, verbose=args.verbose) wr.filter_strand = args.filterRNAstrand wr.Offset = args.Offset elif args.filterRNAstrand: wr = filterRnaStrand([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, verbose=args.verbose, ) wr.filter_strand = args.filterRNAstrand else: wr = writeBedGraph.WriteBedGraph([args.bam], binLength=args.binSize, stepSize=args.binSize, region=args.region, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, verbose=args.verbose, ) wr.run(writeBedGraph.scaleCoverage, func_args, args.outFileName, blackListFileName=args.blackListFileName, format=args.outFileFormat, smoothLength=args.smoothLength)
def main(args=None): """ The algorithm is composed of two steps. 1. Per-sample scaling / depth Normalization: + If scaling is used (using the SES or read counts method), appropriate scaling factors are determined to account for sequencing depth differences. + Optionally scaling can be turned off and individual samples could be depth normalized using RPKM, BPM or CPM methods 2. Ratio calculation between two bam files: + The genome is transversed and computing the log ratio/ratio/difference etc. for bins of fixed width given by the user. """ args = process_args(args) if args.normalizeUsing == "RPGC": sys.exit("RPGC normalization (--normalizeUsing RPGC) is not supported with bamCompare!") if args.normalizeUsing == 'None': args.normalizeUsing = None # For the sake of sanity if args.scaleFactorsMethod != 'None' and args.normalizeUsing: sys.exit("`--normalizeUsing {}` is only valid if you also use `--scaleFactorMethod None`! To prevent erroneous output, I will quit now.\n".format(args.normalizeUsing)) # Get mapping statistics bam1, mapped1, unmapped1, stats1 = bamHandler.openBam(args.bamfile1, returnStats=True, nThreads=args.numberOfProcessors) bam1.close() bam2, mapped2, unmapped2, stats2 = bamHandler.openBam(args.bamfile2, returnStats=True, nThreads=args.numberOfProcessors) bam2.close() scale_factors = get_scale_factors(args, [stats1, stats2], [mapped1, mapped2]) if scale_factors is None: # check whether one of the depth norm methods are selected if args.normalizeUsing is not None: args.scaleFactor = 1.0 # if a normalization is required then compute the scale factors args.bam = args.bamfile1 scale_factor_bam1 = get_scale_factor(args, stats1) args.bam = args.bamfile2 scale_factor_bam2 = get_scale_factor(args, stats2) scale_factors = [scale_factor_bam1, scale_factor_bam2] else: scale_factors = [1, 1] if args.verbose: print("Individual scale factors are {0}".format(scale_factors)) # the getRatio function is called and receives # the func_args per each tile that is considered FUNC = getRatio func_args = {'valueType': args.operation, 'scaleFactors': scale_factors, 'pseudocount': args.pseudocount } wr = writeBedGraph.WriteBedGraph([args.bamfile1, args.bamfile2], args.binSize, 0, stepSize=args.binSize, region=args.region, numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, blackListFileName=args.blackListFileName, minMappingQuality=args.minMappingQuality, ignoreDuplicates=args.ignoreDuplicates, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, skipZeroOverZero=args.skipZeroOverZero, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, chrsToSkip=args.ignoreForNormalization, verbose=args.verbose ) wr.run(FUNC, func_args, args.outFileName, blackListFileName=args.blackListFileName, format=args.outFileFormat, smoothLength=args.smoothLength)