예제 #1
0
def parseArguments():
    parentParser = parserCommon.getParentArgParse()
    bamParser = parserCommon.read_options()
    normalizationParser = parserCommon.normalization_options()
    requiredArgs = getRequiredArgs()
    optionalArgs = getOptionalArgs()
    outputParser = parserCommon.output()
    parser = argparse.ArgumentParser(
        parents=[requiredArgs, outputParser, optionalArgs,
                 parentParser, normalizationParser, bamParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool compares two BAM files based on the number of '
        'mapped reads. To compare the BAM files, the genome is partitioned '
        'into bins of equal size, then the number of reads found in each bin'
        ' is counted per file, and finally a summary value is '
        'reported. This value can be the ratio of the number of reads per '
        'bin, the log2 of the ratio, or the difference. This tool can '
        'normalize the number of reads in each BAM file using the SES method '
        'proposed by Diaz et al. (2012) "Normalization, bias correction, and '
        'peak calling for ChIP-seq". Statistical Applications in Genetics '
        'and Molecular Biology, 11(3). Normalization based on read counts '
        'is also available. The output is either a bedgraph or bigWig file '
        'containing the bin location and the resulting comparison value. '
        'Note that *each end* in a pair (for paired-end reads) is treated '
        'independently. If this is undesirable, then use the --samFlagInclude '
        'or --samFlagExclude options.',

        usage=' bamCompare -b1 treatment.bam -b2 control.bam -o log2ratio.bw',

        add_help=False)

    return parser
예제 #2
0
def parse_arguments(args=None):
    basic_args = plot_enrichment_args()

    # --region, --blackListFileName, -p and -v
    parent_parser = parserCommon.getParentArgParse(binSize=False)

    # --extend reads and such
    read_options = parserCommon.read_options()

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""
Tool for calculating and plotting the signal enrichment in either regions in BED
format or feature types (column 3) in GTF format. The underlying datapoints can also be output.
Metrics are plotted as a fraction of total reads. Regions in a BED file are assigned to the 'peak' feature.

detailed help:

  plotEnrichment -h

""",
        epilog='example usages:\n'
        'plotEnrichment -b file1.bam file2.bam --BED peaks.bed -o enrichment.png\n\n'
        ' \n\n',
        parents=[basic_args, parent_parser, read_options])

    return parser
예제 #3
0
def parse_arguments(args=None):
    parent_parser = parserCommon.getParentArgParse(binSize=False)
    read_options_parser = parserCommon.read_options()

    parser = \
        argparse.ArgumentParser(
            parents=[required_args(), parent_parser, read_options_parser],
            formatter_class=argparse.RawDescriptionHelpFormatter,
            add_help=False,
            description="""

plotCoverage samples 1 million positions of the genome to build
a coverage histogram. Multiple BAM files are accepted but all should
correspond to the same genome assembly.


detailed help:
  plotCoverage  -h

""",
            epilog='example usages:\nplotCoverage '
                   '--bamfiles file1.bam file2.bam -out results.png\n\n'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version',
                        action='version',
                        version='plotCoverage {}'.format(__version__))

    return parser
예제 #4
0
def parse_arguments(args=None):
    parent_parser = parserCommon.getParentArgParse(binSize=False)
    read_options_parser = parserCommon.read_options()

    parser = argparse.ArgumentParser(
        parents=[required_args(), parent_parser, read_options_parser],
        formatter_class=argparse.RawDescriptionHelpFormatter,
        add_help=False,
        description="""

plotCoverage samples 1 million positions of the genome to build
a coverage histogram. Multiple BAM files are accepted but all should
correspond to the same genome assembly.


detailed help:
  plotCoverage  -h

""",
        epilog="example usages:\nplotCoverage " "--bamfiles file1.bam file2.bam -out results.png\n\n" " \n\n",
        conflict_handler="resolve",
    )

    parser.add_argument("--version", action="version", version="plotCoverage {}".format(__version__))

    return parser
예제 #5
0
def parse_arguments(args=None):
    parent_parser = parserCommon.getParentArgParse(binSize=False)
    read_options_parser = parserCommon.read_options()

    parser = \
        argparse.ArgumentParser(
            parents=[required_args(), parent_parser, read_options_parser],
            formatter_class=argparse.RawDescriptionHelpFormatter,
            add_help=False,
            description="""

This tool is useful to assess the sequencing depth of a given sample.
It samples 1 million bp, counts the number of overlapping reads and can report
a histogram that tells you how many bases are covered how many times.
Multiple BAM files are accepted, but they all should correspond to the same genome assembly.

detailed usage help:
 $ plotCoverage  -h

""",
            epilog='example usages:\nplotCoverage '
                   '--bamfiles file1.bam file2.bam -o results.png\n\n'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version',
                        action='version',
                        version='plotCoverage {}'.format(__version__))

    return parser
예제 #6
0
def parse_arguments(args=None):
    parent_parser = parserCommon.getParentArgParse(binSize=False)
    read_options_parser = parserCommon.read_options()

    parser = \
        argparse.ArgumentParser(
            parents=[required_args(), parent_parser, read_options_parser],
            formatter_class=argparse.RawDescriptionHelpFormatter,
            add_help=False,
            description="""

This tool is useful to assess the sequencing depth of a given sample.
It samples 1 million bp, counts the number of overlapping reads and can report
a histogram that tells you how many bases are covered how many times.
Multiple BAM files are accepted, but they all should correspond to the same genome assembly.

detailed usage help:
 $ plotCoverage  -h

""",
            epilog='example usages:\nplotCoverage '
                   '--bamfiles file1.bam file2.bam -o results.png\n\n'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version', action='version',
                        version='plotCoverage {}'.format(__version__))

    return parser
예제 #7
0
def parseArguments():
    parentParser = parserCommon.getParentArgParse()
    bamParser = parserCommon.read_options()
    normalizationParser = parserCommon.normalization_options()
    requiredArgs = getRequiredArgs()
    optionalArgs = getOptionalArgs()
    outputParser = parserCommon.output()
    parser = argparse.ArgumentParser(
        parents=[
            requiredArgs, outputParser, optionalArgs, parentParser,
            normalizationParser, bamParser
        ],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool compares two BAM files based on the number of '
        'mapped reads. To compare the BAM files, the genome is partitioned '
        'into bins of equal size, then the number of reads found in each is counted per file '
        'and finally a summary value is '
        'reported. This value can be the ratio of the number of reads per '
        'bin, the log2 of the ratio or the difference. This tool can '
        'normalize the number of reads in each BAM file using the SES method '
        'proposed in Diaz et al. (2012). "Normalization, bias correction, and '
        'peak calling for ChIP-seq". Statistical applications in genetics '
        'and molecular biology, 11(3). Normalization based on read counts '
        'is also available. The output is either a bedgraph or bigWig file '
        'containing the bin location and the resulting comparison values. By '
        'default, if reads are mated, the fragment length reported in the BAM '
        'file is used. In the case of paired-end mapping, each mate '
        'is treated independently to avoid a bias when a mixture of concordant '
        'and discordant pairs is present. This means that *each end* will '
        'be extended to match the fragment length.',
        usage='An example usage is:\n bamCompare '
        '-b1 treatment.bam -b2 control.bam -o log2ratio.bw',
        add_help=False)

    return parser
예제 #8
0
def parseArguments():
    parentParser = parserCommon.getParentArgParse()
    bamParser = parserCommon.read_options()
    normalizationParser = parserCommon.normalization_options()
    requiredArgs = get_required_args()
    optionalArgs = get_optional_args()
    outputParser = parserCommon.output()
    parser = \
        argparse.ArgumentParser(
            parents=[requiredArgs, outputParser, optionalArgs,
                     parentParser, normalizationParser, bamParser],
            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
            description='This tool takes an alignment of reads or fragments '
            'as input (BAM file) and generates a coverage track (bigWig or '
            'bedGraph) as output. '
            'The coverage is calculated as the number of reads per bin, '
            'where bins are short consecutive counting windows of a defined '
            'size. It is possible to extended the length of the reads '
            'to better reflect the actual fragment length. *bamCoverage* '
            'offers normalization by scaling factor, Reads Per Kilobase per '
            'Million mapped reads (RPKM), counts per million (CPM), bins per '
            'million mapped reads (BPM) and 1x depth (reads per genome '
            'coverage, RPGC).\n',
            usage='An example usage is:'
            '$ bamCoverage -b reads.bam -o coverage.bw',
            add_help=False)

    return parser
예제 #9
0
def parseArguments():
    parentParser = parserCommon.getParentArgParse()
    bamParser = parserCommon.read_options()
    normalizationParser = parserCommon.normalization_options()
    requiredArgs = getRequiredArgs()
    optionalArgs = getOptionalArgs()
    outputParser = parserCommon.output()
    parser = argparse.ArgumentParser(
        parents=[
            requiredArgs, outputParser, optionalArgs, parentParser,
            normalizationParser, bamParser
        ],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool compares two BAM files based on the number of '
        'mapped reads. To compare the BAM files, the genome is partitioned '
        'into bins of equal size, then the number of reads found in each bin'
        ' is counted per file, and finally a summary value is '
        'reported. This value can be the ratio of the number of reads per '
        'bin, the log2 of the ratio, or the difference. This tool can '
        'normalize the number of reads in each BAM file using the SES method '
        'proposed by Diaz et al. (2012) "Normalization, bias correction, and '
        'peak calling for ChIP-seq". Statistical Applications in Genetics '
        'and Molecular Biology, 11(3). Normalization based on read counts '
        'is also available. The output is either a bedgraph or bigWig file '
        'containing the bin location and the resulting comparison value. '
        'Note that *each end* in a pair (for paired-end reads) is treated '
        'independently. If this is undesirable, then use the --samFlagInclude '
        'or --samFlagExclude options.',
        usage=' bamCompare -b1 treatment.bam -b2 control.bam -o log2ratio.bw',
        add_help=False)

    return parser
예제 #10
0
def parse_arguments(args=None):
    basic_args = plot_enrichment_args()

    # --region, --blackListFileName, -p and -v
    parent_parser = parserCommon.getParentArgParse(binSize=False)

    # --extend reads and such
    read_options = parserCommon.read_options()

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""
Tool for calculating and plotting the signal enrichment in either regions in BED
format or feature types (column 3) in GTF format. The underlying datapoints can also be output.
Metrics are plotted as a fraction of total reads. Regions in a BED file are assigned to the 'peak' feature.

detailed help:

  plotEnrichment -h

""",
        epilog='example usages:\n'
               'plotEnrichment -b file1.bam file2.bam --BED peaks.bed -o enrichment.png\n\n'
               ' \n\n',
        parents=[basic_args, parent_parser, read_options])

    return parser
예제 #11
0
def parseArguments():
    parentParser = parserCommon.getParentArgParse()
    bamParser = parserCommon.read_options()
    normalizationParser = parserCommon.normalization_options()
    requiredArgs = getRequiredArgs()
    optionalArgs = getOptionalArgs()
    outputParser = parserCommon.output()
    parser = argparse.ArgumentParser(
        parents=[requiredArgs, outputParser, optionalArgs,
                 parentParser, normalizationParser, bamParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool compares two BAM files based on the number of '
        'mapped reads. To compare the BAM files, the genome is partitioned '
        'into bins of equal size, then the number of reads found in each bin'
        'is counted per file and finally a summary value is '
        'reported. This value can be the ratio of the number of reads per '
        'bin, the log2 of the ratio or the difference. \nThis tool can '
        'normalize the number of reads in each BAM file using the SES method '
        'proposed by Diaz et al. (2012) "Normalization, bias correction, and '
        'peak calling for ChIP-seq". Statistical Applications in Genetics '
        'and Molecular Biology, 11(3). Normalization based on read counts '
        'is also available. \nThe output is either a bedgraph or bigWig file '
        'containing the bin location and the resulting comparison value. By '
        'default, if reads are paired, the fragment length reported in the BAM '
        'file is used. Each mate, however, '
        'is treated independently to avoid a bias when a mixture of concordant '
        'and discordant pairs is present. This means that *each end* will '
        'be extended to match the fragment length.',

        usage=' bamCompare -b1 treatment.bam -b2 control.bam -o log2ratio.bw',

        add_help=False)

    return parser
예제 #12
0
def parseArguments():
    parentParser = parserCommon.getParentArgParse()
    bamParser = parserCommon.read_options()
    normalizationParser = parserCommon.normalization_options()
    requiredArgs = get_required_args()
    optionalArgs = get_optional_args()
    outputParser = parserCommon.output()
    parser = \
        argparse.ArgumentParser(
            parents=[requiredArgs, outputParser, optionalArgs,
                     parentParser, normalizationParser, bamParser],
            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
            description='This tool takes an alignment of reads or fragments '
            'as input (BAM file) and generates a coverage track (bigWig or '
            'bedGraph) as output. '
            'The coverage is calculated as the number of reads per bin, '
            'where bins are short consecutive counting windows of a defined '
            'size. It is possible to extended the length of the reads '
            'to better reflect the actual fragment length. *bamCoverage* '
            'offers normalization by scaling factor, Reads Per Kilobase per '
            'Million mapped reads (RPKM), counts per million (CPM), bins per '
            'million mapped reads (BPM) and 1x depth (reads per genome '
            'coverage, RPGC).\n',
            usage='An example usage is:'
            '$ bamCoverage -b reads.bam -o coverage.bw',
            add_help=False)

    return parser
예제 #13
0
def parse_arguments(args=None):
    parent_parser = parserCommon.getParentArgParse(binSize=False)
    required_args = get_required_args()
    output_args = get_output_args()
    optional_args = get_optional_args()
    read_options_parser = parserCommon.read_options()
    parser = argparse.ArgumentParser(
        parents=[
            required_args, output_args, read_options_parser, optional_args,
            parent_parser
        ],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool samples indexed BAM files '
        'and plots a profile of cumulative read coverages for each. '
        'All reads overlapping a window (bin) of the '
        'specified length are counted; '
        'these counts are sorted '
        'and the cumulative sum is finally plotted. ',
        conflict_handler='resolve',
        usage=
        'An example usage is: plotFingerprint -b treatment.bam control.bam '
        '-plot fingerprint.png',
        add_help=False)

    return parser
예제 #14
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse(binSize=False, blackList=True)
    requiredArgs = getRequiredArgs()
    parser = argparse.ArgumentParser(
        parents=[requiredArgs, parentParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='Computes the GC-bias using Benjamini\'s method '
        '[Benjamini & Speed (2012). Nucleic Acids Research, 40(10). doi: 10.1093/nar/gks001]. '
        'The GC-bias is visualized and the resulting table can be used to'
        'correct the bias with `correctGCBias`.',
        usage='\n computeGCBias '
        '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit -l 200 --GCbiasFrequenciesFile freq.txt [options]',
        conflict_handler='resolve',
        add_help=False)

    return parser
예제 #15
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse(binSize=False)
    requiredArgs = getRequiredArgs()
    parser = argparse.ArgumentParser(
        parents=[requiredArgs, parentParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='Computes the GC-bias using Benjamini\'s method '
        '[Benjamini & Speed (2012). Nucleic acids research, 40(10)]. '
        'The resulting GC-bias can later be used to plot '
        'or correct the bias.',
        usage='An example usage is:\n computeGCBias '
        '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit -l 200 --GCbiasFrequenciesFile freq.txt [options]',
        conflict_handler='resolve',
        add_help=False)

    return parser
예제 #16
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse(binSize=True)
    requiredArgs = getRequiredArgs()
    parser = argparse.ArgumentParser(
        parents=[requiredArgs, parentParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='Corrects the GC-bias using Benjamini\'s method '
        '[Benjamini & Speed (2012). Nucleic acids research, 40(10)]. '
        'The tool computeGC bias needs to be run first.',
        usage='An example usage is:\n correctGCBias '
        '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit '
        '--GCbiasFrequenciesFile freq.txt -o gc_corrected.bam '
        '[options]',
        conflict_handler='resolve',
        add_help=False)
    return parser
예제 #17
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse(binSize=False, blackList=True)
    requiredArgs = getRequiredArgs()
    parser = argparse.ArgumentParser(
        parents=[requiredArgs, parentParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='Computes the GC-bias using Benjamini\'s method '
        '[Benjamini & Speed (2012). Nucleic Acids Research, 40(10). doi: 10.1093/nar/gks001]. '
        'The GC-bias is visualized and the resulting table can be used to'
        'correct the bias with `correctGCBias`.',
        usage='\n computeGCBias '
        '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit -l 200 --GCbiasFrequenciesFile freq.txt [options]',
        conflict_handler='resolve',
        add_help=False)

    return parser
예제 #18
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse(binSize=True, blackList=False)
    requiredArgs = getRequiredArgs()
    parser = argparse.ArgumentParser(
        parents=[requiredArgs, parentParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool corrects the GC-bias using the'
        ' method proposed by [Benjamini & Speed (2012). '
        'Nucleic Acids Research, 40(10)]. It will remove reads'
        ' from regions with too high coverage compared to the'
        ' expected values (typically GC-rich regions) and will'
        ' add reads to regions where too few reads are seen '
        '(typically AT-rich regions). '
        'The tool ``computeGCBias`` needs to be run first to generate the '
        'frequency table needed here.',
        usage='An example usage is:\n correctGCBias '
        '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit '
        '--GCbiasFrequenciesFile freq.txt -o gc_corrected.bam '
        '[options]',
        conflict_handler='resolve',
        add_help=False)
    return parser
예제 #19
0
def parse_arguments(args=None):
    parent_parser = parserCommon.getParentArgParse(binSize=False)
    required_args = get_required_args()
    output_args = get_output_args()
    optional_args = get_optional_args()
    read_options_parser = parserCommon.read_options()
    parser = argparse.ArgumentParser(
        parents=[required_args, output_args, read_options_parser,
                 optional_args, parent_parser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool samples indexed BAM files '
        'and plots a profile of cumulative read coverages for each. '
        'All reads overlapping a window (bin) of the '
        'specified length are counted; '
        'these counts are sorted '
        'and the cumulative sum is finally plotted. ',
        conflict_handler='resolve',
        usage='An example usage is: plotFingerprint -b treatment.bam control.bam '
        '-plot fingerprint.png',
        add_help=False)

    return parser
예제 #20
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse(binSize=True, blackList=False)
    requiredArgs = getRequiredArgs()
    parser = argparse.ArgumentParser(
        parents=[requiredArgs, parentParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool corrects the GC-bias using the'
        ' method proposed by [Benjamini & Speed (2012). '
        'Nucleic Acids Research, 40(10)]. It will remove reads'
        ' from regions with too high coverage compared to the'
        ' expected values (typically GC-rich regions) and will'
        ' add reads to regions where too few reads are seen '
        '(typically AT-rich regions). '
        'The tool ``computeGCBias`` needs to be run first to generate the '
        'frequency table needed here.',
        usage='An example usage is:\n correctGCBias '
        '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit '
        '--GCbiasFrequenciesFile freq.txt -o gc_corrected.bam '
        '[options]',
        conflict_handler='resolve',
        add_help=False)
    return parser
예제 #21
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse()
    outputParser = parserCommon.output()
    parser = argparse.ArgumentParser(
        parents=[parentParser, outputParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool compares two bigWig files based on the number '
        'of mapped reads. To compare the bigWig files, the genome is '
        'partitioned into bins of equal size, then the number of reads found '
        'in each BAM file are counted per bin and finally a summary '
        'value is reported. This value can be the ratio of the number of reads'
        'per bin, the log2 of the ratio, the sum or the difference.')

    # define the arguments
    parser.add_argument('--bigwig1',
                        '-b1',
                        metavar='Bigwig file',
                        help='Bigwig file 1. Usually the file for the '
                        'treatment.',
                        required=True)

    parser.add_argument('--bigwig2',
                        '-b2',
                        metavar='Bigwig file',
                        help='Bigwig file 2. Usually the file for the '
                        'control.',
                        required=True)

    parser.add_argument('--scaleFactors',
                        help='Set this parameter to multipy the bigwig values '
                        'by a constant. The format is '
                        'scaleFactor1:scaleFactor2. '
                        'For example 0.7:1 to scale the first bigwig file '
                        'by 0.7 while not scaling the second bigwig file',
                        default=None,
                        required=False)

    parser.add_argument('--pseudocount',
                        help='small number to avoid x/0. Only useful '
                        'when ratio = log2 or ratio',
                        default=1,
                        type=float,
                        required=False)

    parser.add_argument(
        '--ratio',
        help='The default is to compute the log2(ratio) '
        'between the two samples. The reciprocal '
        'ratio returns the '
        'the negative of the inverse of the ratio '
        'if the ratio is less than 0. The resulting '
        'values are interpreted as negative fold changes. '
        'Other possible operations are : simple ratio, '
        'subtraction, sum',
        default='log2',
        choices=['log2', 'ratio', 'subtract', 'add', 'reciprocal_ratio'],
        required=False)

    parser.add_argument(
        '--skipNonCoveredRegions',
        '--skipNAs',
        help=
        'This parameter determines if non-covered regions (regions without a score) '
        'in the bigWig files should be skipped. The default is to treat those '
        'regions as having a value of zero. '
        'The decision to skip non-covered regions '
        'depends on the interpretation of the data. Non-covered regions '
        'in a bigWig file may represent repetitive regions that should '
        'be skipped. Alternatively, the interpretation of non-covered regions as '
        'zeros may be wrong and this option should be used ',
        action='store_true')

    return parser
예제 #22
0
def parse_arguments(args=None):
    parser = \
        argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description="""

Given typically two or more bigWig files, ``multiBigwigSummary`` computes the average scores for each of the files in every genomic region.
This analysis is performed for the entire genome by running the program in ``bins`` mode, or for certain user selected regions in ``BED-file``
mode. Most commonly, the default output of ``multiBigwigSummary`` (a compressed numpy array, .npz) is used by other tools such as ``plotCorrelation`` or ``plotPCA`` for visualization and diagnostic purposes.

Note that using a single bigWig file is only recommended if you want to produce a bedGraph file (i.e., with the ``--outRawCounts`` option; the default output file cannot be used by ANY deepTools program if only a single file was supplied!).

A detailed sub-commands help is available by typing:

  multiBigwigSummary bins -h

  multiBigwigSummary BED-file -h


""",
            epilog='example usage:\n multiBigwigSummary bins '
                   '-b file1.bw file2.bw -out results.npz\n\n'
                   'multiBigwigSummary BED-file -b file1.bw file2.bw -out results.npz\n'
                   '--BED selection.bed'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version', action='version',
                        version='multiBigwigSummary {}'.format(__version__))
    subparsers = parser.add_subparsers(
        title="commands",
        dest='command',
        metavar='')

    parent_parser = parserCommon.getParentArgParse(binSize=False)
    dbParser = parserCommon.deepBlueOptionalArgs()

    # bins mode options
    subparsers.add_parser(
        'bins',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[multiBigwigSummaryArgs(case='bins'),
                 parent_parser,
                 parserCommon.gtf_options(suppress=True),
                 dbParser
                 ],
        help="The average score is based on equally sized bins "
             "(10 kilobases by default), which consecutively cover the "
             "entire genome. The only exception is the last bin of a chromosome, which "
             "is often smaller. The output of this mode is commonly used to assess the "
             "overall similarity of different bigWig files.",
        add_help=False,
        usage='multiBigwigSummary '
              '-b file1.bw file2.bw '
              '-out results.npz\n')

    # BED file arguments
    subparsers.add_parser(
        'BED-file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[multiBigwigSummaryArgs(case='BED-file'),
                 parent_parser,
                 parserCommon.gtf_options(),
                 dbParser
                 ],
        help="The user provides a BED file that contains all regions "
             "that should be considered for the analysis. A "
             "common use is to compare scores (e.g. ChIP-seq scores) between "
             "different samples over a set of pre-defined peak regions.",
        usage='multiBigwigSummary '
              '-b file1.bw file2.bw '
              '-out results.npz --BED selection.bed\n',
        add_help=False)

    return parser
예제 #23
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse()
    outputParser = parserCommon.output()
    dbParser = parserCommon.deepBlueOptionalArgs()
    parser = argparse.ArgumentParser(
        parents=[parentParser, outputParser, dbParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool compares two bigWig files based on the number '
        'of mapped reads. To compare the bigWig files, the genome is '
        'partitioned into bins of equal size, then the number of reads found '
        'in each BAM file are counted per bin and finally a summary '
        'value is reported. This value can be the ratio of the number of reads'
        'per bin, the log2 of the ratio, the sum or the difference.')

    # define the arguments
    parser.add_argument('--bigwig1',
                        '-b1',
                        metavar='Bigwig file',
                        help='Bigwig file 1. Usually the file for the '
                        'treatment.',
                        required=True)

    parser.add_argument('--bigwig2',
                        '-b2',
                        metavar='Bigwig file',
                        help='Bigwig file 2. Usually the file for the '
                        'control.',
                        required=True)

    parser.add_argument('--scaleFactors',
                        help='Set this parameter to multipy the bigwig values '
                        'by a constant. The format is '
                        'scaleFactor1:scaleFactor2. '
                        'For example 0.7:1 to scale the first bigwig file '
                        'by 0.7 while not scaling the second bigwig file',
                        default=None,
                        required=False)

    parser.add_argument(
        '--pseudocount',
        help='A small number to avoid x/0. Only useful '
        'together with --operation log2 or --operation ratio. '
        'You can specify different values as pseudocounts for '
        'the numerator and the denominator by providing two '
        'values (the first value is used as the numerator '
        'pseudocount and the second the denominator pseudocount). (Default: %(default)s)',
        default=1,
        nargs='+',
        action=parserCommon.requiredLength(1, 2),
        type=float,
        required=False)

    parser.add_argument('--skipZeroOverZero',
                        help='Skip bins where BOTH BAM files lack coverage. '
                        'This is determined BEFORE any applicable pseudocount '
                        'is added.',
                        action='store_true')

    parser.add_argument(
        '--operation',
        help='The default is to output the log2ratio of the '
        'two samples. The reciprocal ratio returns the '
        'the negative of the inverse of the ratio '
        'if the ratio is less than 0. The resulting '
        'values are interpreted as negative fold changes. '
        'Instead of performing a '
        'computation using both files, the scaled signal can '
        'alternatively be output for the first or second file using '
        'the \'--operation first\' or \'--operation second\' (Default: %(default)s)',
        default='log2',
        choices=[
            'log2', 'ratio', 'subtract', 'add', 'mean', 'reciprocal_ratio',
            'first', 'second'
        ],
        required=False)

    parser.add_argument(
        '--skipNonCoveredRegions',
        '--skipNAs',
        help=
        'This parameter determines if non-covered regions (regions without a score) '
        'in the bigWig files should be skipped. The default is to treat those '
        'regions as having a value of zero. '
        'The decision to skip non-covered regions '
        'depends on the interpretation of the data. Non-covered regions '
        'in a bigWig file may represent repetitive regions that should '
        'be skipped. Alternatively, the interpretation of non-covered regions as '
        'zeros may be wrong and this option should be used ',
        action='store_true')

    return parser
예제 #24
0
def parse_arguments(args=None):
    parser = \
        argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description="""

Given typically two or more bigWig files, ``multiBigwigSummary`` computes the average scores for each of the files in every genomic region.
This analysis is performed for the entire genome by running the program in ``bins`` mode, or for certain user selected regions in ``BED-file``
mode. Most commonly, the default output of ``multiBigwigSummary`` (a compressed numpy array, .npz) is used by other tools such as ``plotCorrelation`` or ``plotPCA`` for visualization and diagnostic purposes.

Note that using a single bigWig file is only recommended if you want to produce a bedGraph file (i.e., with the ``--outRawCounts`` option; the default output file cannot be used by ANY deepTools program if only a single file was supplied!).

A detailed sub-commands help is available by typing:

  multiBigwigSummary bins -h

  multiBigwigSummary BED-file -h


""",
            epilog='example usage:\n multiBigwigSummary bins '
                   '-b file1.bw file2.bw -o results.npz\n\n'
                   'multiBigwigSummary BED-file -b file1.bw file2.bw -o results.npz\n'
                   '--BED selection.bed'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version',
                        action='version',
                        version='multiBigwigSummary {}'.format(__version__))
    subparsers = parser.add_subparsers(title="commands",
                                       dest='command',
                                       metavar='')

    parent_parser = parserCommon.getParentArgParse(binSize=False)
    dbParser = parserCommon.deepBlueOptionalArgs()

    # bins mode options
    subparsers.add_parser(
        'bins',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[
            multiBigwigSummaryArgs(case='bins'), parent_parser,
            parserCommon.gtf_options(suppress=True), dbParser
        ],
        help="The average score is based on equally sized bins "
        "(10 kilobases by default), which consecutively cover the "
        "entire genome. The only exception is the last bin of a chromosome, which "
        "is often smaller. The output of this mode is commonly used to assess the "
        "overall similarity of different bigWig files.",
        add_help=False,
        usage='multiBigwigSummary bins '
        '-b file1.bw file2.bw '
        '-o results.npz\n')

    # BED file arguments
    subparsers.add_parser(
        'BED-file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[
            multiBigwigSummaryArgs(case='BED-file'), parent_parser,
            parserCommon.gtf_options(), dbParser
        ],
        help="The user provides a BED file that contains all regions "
        "that should be considered for the analysis. A "
        "common use is to compare scores (e.g. ChIP-seq scores) between "
        "different samples over a set of pre-defined peak regions.",
        usage='multiBigwigSummary BED-file '
        '-b file1.bw file2.bw '
        '-o results.npz --BED selection.bed\n',
        add_help=False)

    return parser
예제 #25
0
def parse_arguments(args=None):
    parentParser = parserCommon.getParentArgParse()
    outputParser = parserCommon.output()
    dbParser = parserCommon.deepBlueOptionalArgs()
    parser = argparse.ArgumentParser(
        parents=[parentParser, outputParser, dbParser],
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description='This tool compares two bigWig files based on the number '
        'of mapped reads. To compare the bigWig files, the genome is '
        'partitioned into bins of equal size, then the number of reads found '
        'in each BAM file are counted per bin and finally a summary '
        'value is reported. This value can be the ratio of the number of reads'
        'per bin, the log2 of the ratio, the sum or the difference.')

    # define the arguments
    parser.add_argument('--bigwig1', '-b1',
                        metavar='Bigwig file',
                        help='Bigwig file 1. Usually the file for the '
                        'treatment.',
                        required=True)

    parser.add_argument('--bigwig2', '-b2',
                        metavar='Bigwig file',
                        help='Bigwig file 2. Usually the file for the '
                        'control.',
                        required=True)

    parser.add_argument('--scaleFactors',
                        help='Set this parameter to multipy the bigwig values '
                        'by a constant. The format is '
                        'scaleFactor1:scaleFactor2. '
                        'For example 0.7:1 to scale the first bigwig file '
                        'by 0.7 while not scaling the second bigwig file',
                        default=None,
                        required=False)

    parser.add_argument('--pseudocount',
                        help='small number to avoid x/0. Only useful '
                        'when ratio = log2 or ratio',
                        default=1,
                        type=float,
                        required=False)

    parser.add_argument('--ratio',
                        help='The default is to output the log2ratio of the '
                        'two samples. The reciprocal ratio returns the '
                        'the negative of the inverse of the ratio '
                        'if the ratio is less than 0. The resulting '
                        'values are interpreted as negative fold changes. '
                        '*NOTE*: Only with --ratio subtract can --normalizeTo1x or '
                        '--normalizeUsingRPKM be used. Instead of performing a '
                        'computation using both files, the scaled signal can '
                        'alternatively be output for the first or second file using '
                        'the \'--ratio first\' or \'--ratio second\'',
                        default='log2',
                        choices=['log2', 'ratio', 'subtract', 'add', 'mean',
                                 'reciprocal_ratio', 'first', 'second'],
                        required=False)

    parser.add_argument('--skipNonCoveredRegions', '--skipNAs',
                        help='This parameter determines if non-covered regions (regions without a score) '
                        'in the bigWig files should be skipped. The default is to treat those '
                        'regions as having a value of zero. '
                        'The decision to skip non-covered regions '
                        'depends on the interpretation of the data. Non-covered regions '
                        'in a bigWig file may represent repetitive regions that should '
                        'be skipped. Alternatively, the interpretation of non-covered regions as '
                        'zeros may be wrong and this option should be used ',
                        action='store_true')

    return parser
예제 #26
0
def parse_arguments(args=None):
    parser = \
        argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description="""
bamCorrelate computes the read coverage in genomic regions of two or more BAM files.
This analysis is performed for the entire genome by running the program in 'bins' mode, or for certain user selected regions in 'BED-file'
mode. Most commonly, the output of bamCorrelates is used by other tools such as 'plotCorrelation' or 'plotPCA' for visualization and diagnostic purposes.

detailed sub-commands help available under:

  bamCorrelate bins -h

  bamCorrelate BED-file -h

""",
            epilog='example usages:\n'
                   'bamCorrelate bins --bamfiles file1.bam file2.bam -out results.npz \n\n'
                   'bamCorrelate BED-file --BED selection.bed --bamfiles file1.bam file2.bam \n'
                   '-out results.npz'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version', action='version',
                        version='%(prog)s {}'.format(__version__))
    subparsers = parser.add_subparsers(
        title="commands",
        dest='command',
        description='subcommands',
        help='subcommands',
        metavar='')

    parent_parser = parserCommon.getParentArgParse(binSize=False)
    read_options_parser = parserCommon.read_options()

    # bins mode options
    subparsers.add_parser(
        'bins',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[bamcorrelate_args(case='bins'),
                 parent_parser, read_options_parser,
                 ],
        help="The coverage calculation is done for consecutive bins of equal "
             "size (10 kilobases by default). This mode is useful to assess the "
             "genome-wide similarity of BAM files. The bin size and "
             "distance between bins can be adjusted.",
        add_help=False,
        usage='%(prog)s '
              '--bamfiles file1.bam file2.bam '
              '-out results.npz \n')

    # BED file arguments
    subparsers.add_parser(
        'BED-file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[bamcorrelate_args(case='BED-file'),
                 parent_parser, read_options_parser,
                 ],
        help="The user provides a BED file that contains all regions "
             "that should be considered for the coverage analysis. A "
             "common use is to compare ChIP-seq coverages between two "
             "different samples for a set of peak regions.",
        usage='%(prog)s --BED selection.bed --bamfiles file1.bam file2.bam -out results.npz\n',
        add_help=False)

    return parser
예제 #27
0
def parse_arguments(args=None):
    parser = \
        argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description="""

``multiBamSummary`` computes the read coverages for genomic regions for typically two or more BAM files.
The analysis can be performed for the entire genome by running the program in 'bins' mode.
If you want to count the read coverage for specific regions only, use the ``BED-file`` mode instead.
The standard output of ``multiBamSummary`` is a compressed numpy array (``.npz``).
It can be directly used to calculate and visualize pairwise correlation values between the read coverages using the tool 'plotCorrelation'.
Similarly, ``plotPCA`` can be used for principal component analysis of the read coverages using the .npz file.
Note that using a single bigWig file is only recommended if you want to produce a bedGraph file (i.e., with the ``--outRawCounts`` option; the default output file cannot be used by ANY deepTools program if only a single file was supplied!).

A detailed sub-commands help is available by typing:

  multiBamSummary bins -h

  multiBamSummary BED-file -h


""",
            epilog='example usages:\n'
                   'multiBamSummary bins --bamfiles file1.bam file2.bam -out results.npz \n\n'
                   'multiBamSummary BED-file --BED selection.bed --bamfiles file1.bam file2.bam \n'
                   '-out results.npz'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version', action='version',
                        version='%(prog)s {}'.format(__version__))
    subparsers = parser.add_subparsers(
        title="commands",
        dest='command',
        description='subcommands',
        help='subcommands',
        metavar='')

    parent_parser = parserCommon.getParentArgParse(binSize=False)
    read_options_parser = parserCommon.read_options()

    # bins mode options
    subparsers.add_parser(
        'bins',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[bamcorrelate_args(case='bins'),
                 parent_parser, read_options_parser,
                 parserCommon.gtf_options(suppress=True)
                 ],
        help="The coverage calculation is done for consecutive bins of equal "
             "size (10 kilobases by default). This mode is useful to assess the "
             "genome-wide similarity of BAM files. The bin size and "
             "distance between bins can be adjusted.",
        add_help=False,
        usage='%(prog)s '
              '--bamfiles file1.bam file2.bam '
              '-out results.npz \n')

    # BED file arguments
    subparsers.add_parser(
        'BED-file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[bamcorrelate_args(case='BED-file'),
                 parent_parser, read_options_parser,
                 parserCommon.gtf_options()
                 ],
        help="The user provides a BED file that contains all regions "
             "that should be considered for the coverage analysis. A "
             "common use is to compare ChIP-seq coverages between two "
             "different samples for a set of peak regions.",
        usage='%(prog)s --BED selection.bed --bamfiles file1.bam file2.bam -out results.npz\n',
        add_help=False)

    return parser
예제 #28
0
def parse_arguments(args=None):
    parser = \
        argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description="""

``multiBamSummary`` computes the read coverages for genomic regions for typically two or more BAM files.
The analysis can be performed for the entire genome by running the program in 'bins' mode.
If you want to count the read coverage for specific regions only, use the ``BED-file`` mode instead.
The standard output of ``multiBamSummary`` is a compressed numpy array (``.npz``).
It can be directly used to calculate and visualize pairwise correlation values between the read coverages using the tool 'plotCorrelation'.
Similarly, ``plotPCA`` can be used for principal component analysis of the read coverages using the .npz file.
Note that using a single bigWig file is only recommended if you want to produce a bedGraph file (i.e., with the ``--outRawCounts`` option; the default output file cannot be used by ANY deepTools program if only a single file was supplied!).

A detailed sub-commands help is available by typing:

  multiBamSummary bins -h

  multiBamSummary BED-file -h


""",
            epilog='example usages:\n'
                   'multiBamSummary bins --bamfiles file1.bam file2.bam -o results.npz \n\n'
                   'multiBamSummary BED-file --BED selection.bed --bamfiles file1.bam file2.bam \n'
                   '-o results.npz'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s {}'.format(__version__))
    subparsers = parser.add_subparsers(title="commands",
                                       dest='command',
                                       description='subcommands',
                                       help='subcommands',
                                       metavar='')

    parent_parser = parserCommon.getParentArgParse(binSize=False)
    read_options_parser = parserCommon.read_options()

    # bins mode options
    subparsers.add_parser(
        'bins',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[
            bamcorrelate_args(case='bins'), parent_parser, read_options_parser,
            parserCommon.gtf_options(suppress=True)
        ],
        help="The coverage calculation is done for consecutive bins of equal "
        "size (10 kilobases by default). This mode is useful to assess the "
        "genome-wide similarity of BAM files. The bin size and "
        "distance between bins can be adjusted.",
        add_help=False,
        usage='%(prog)s '
        '--bamfiles file1.bam file2.bam '
        '-o results.npz \n')

    # BED file arguments
    subparsers.add_parser(
        'BED-file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[
            bamcorrelate_args(case='BED-file'), parent_parser,
            read_options_parser,
            parserCommon.gtf_options()
        ],
        help="The user provides a BED file that contains all regions "
        "that should be considered for the coverage analysis. A "
        "common use is to compare ChIP-seq coverages between two "
        "different samples for a set of peak regions.",
        usage=
        '%(prog)s --BED selection.bed --bamfiles file1.bam file2.bam -o results.npz\n',
        add_help=False)

    return parser
예제 #29
0
def parse_arguments(args=None):
    parser = \
        argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description="""

Given two or more bigWig files, bigwigCorrelate computes the average scores for each of the files in every genomic region.
This analysis is performed for the entire genome by running the program in 'bins' mode, or for certain user selected regions in 'BED-file'
mode. Most commonly, the output of bigwigCorrelate is used by other tools such as 'plotCorrelation' or 'plotPCA' for visualization and diagnostic purposes.

detailed sub-commands help available under:

  bigwigCorrelate bins -h

  bigwigCorrelate BED-file -h

""",
            epilog='example usages:\n bigwigCorrelate bins '
                   '-b file1.bw file2.bw -out results.npz\n\n'
                   'bigwigCorrelate BED-file -b file1.bw file2.bw -out results.npz\n'
                   '--BED selection.bed'
                   ' \n\n',
            conflict_handler='resolve')

    parser.add_argument('--version',
                        action='version',
                        version='bigwigCorrelate {}'.format(__version__))
    subparsers = parser.add_subparsers(title="commands",
                                       dest='command',
                                       metavar='')

    parent_parser = parserCommon.getParentArgParse(binSize=False)
    # read_options_parser = parserCommon.read_options()

    # bins mode options
    subparsers.add_parser(
        'bins',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[
            bigwigCorrelateArgs(case='bins'),
            parent_parser,
        ],
        help="The average score is based on equally sized bins "
        "(10 kilobases by default), which consecutively cover the "
        "entire genome. The only exception is the last bin of a chromosome, which "
        "is often smaller. The output of this mode is commonly used to assess the "
        "overall similarity of different bigWig files.",
        add_help=False,
        usage='bigWigCorrelate '
        '-b file1.bw file2.bw '
        '-out results.npz\n')

    # BED file arguments
    subparsers.add_parser(
        'BED-file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[bigwigCorrelateArgs(case='BED-file'), parent_parser],
        help="The user provides a BED file that contains all regions "
        "that should be considered for the analysis. A "
        "common use is to compare scores (e.g. ChIP-seq scores) between "
        "different samples over a set of pre-defined peak regions.",
        usage='bigwigCorrelate '
        '-b file1.bw file2.bw '
        '-out results.npz --BED selection.bed\n',
        add_help=False)

    return parser