Esempio n. 1
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    parser = buildOptionParser(argv)

    ## add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    if len(args) != 2:
        raise ValueError("please specify a bam and bed file")

    infile, bedfile = args
    control_file = None
    if options.control_file:
        E.info("using control file %s" % options.control_file)

    if options.format == "bigwig":
        fg_file = bx.bbi.bigwig_file.BigWigFile(open(infile))
        if options.control_file:
            control_file = bx.bbi.bigwig_file.BigWigFile(
                open(options.control_file))
        counter = _bam2peakshape.CounterBigwig()

    elif options.format == "bam":
        fg_file = pysam.Samfile(infile, "rb")
        if options.control_file:
            control_file = pysam.Samfile(options.control_file, "rb")
        counter = _bam2peakshape.CounterBam(shift=options.shift)

    result, bins = buildResults(bedfile, fg_file, control_file, counter,
                                options)

    if len(result) == 0:
        E.warn("no data - no output")
        E.Stop()
        return

    outputResults(result, bins, options)

    ## write footer and output benchmark information.
    E.Stop()
Esempio n. 2
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    parser = buildOptionParser(argv)

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    if len(args) != 2:
        raise ValueError(
            "please specify one bam- or wig-file and one bed file")

    if options.control_files:
        E.info("using control files: %s" % ",".join(options.control_files))

    infile, bedfile = args
    control_files = []

    if options.format == "bigwig":
        fg_file = bx.bbi.bigwig_file.BigWigFile(open(infile))
        for control_file in options.control_files:
            control_files.append(
                bx.bbi.bigwig_file.BigWigFile(open(control_file)))
        counter = _bam2peakshape.CounterBigwig(
            smooth_method=options.smooth_method)

    elif options.format == "bam":
        fg_file = pysam.Samfile(infile, "rb")
        for control_file in options.control_files:
            control_files.append(pysam.Samfile(control_file, "rb"))
        counter = _bam2peakshape.CounterBam(
            shift=options.shift, smooth_method=options.smooth_method)

    features_per_interval, bins = buildDensityMatrices(
        Bed.iterator(IOTools.openFile(bedfile)),
        fg_file,
        control_files,
        counter,
        window_size=options.window_size,
        bin_size=options.bin_size,
        strand_specific=options.strand_specific,
        centring_method=options.centring_method,
        use_interval=options.use_interval,
        random_shift=options.random_shift,
        smooth_method=options.smooth_method,
        report_step=options.report_step)

    if len(features_per_interval) == 0:
        E.warn("no data - no output")
        E.Stop()
        return

    outputFeatureTable(options.stdout, features_per_interval, bins)

    # apply normalization
    # Note: does not normalize control?
    # Needs reworking, currently it does not normalize across
    # all samples nor does the work "sum" reflect the per million
    # normalization.
    if options.normalization == "sum":
        E.info("starting sum normalization")
        # get total counts across all intervals
        norm = 0.0
        for foreground, bed, controls, shifted in features_per_interval:
            norm += sum(foreground.counts)
        # per million
        norm /= float(1000000)
        E.info("sum/million normalization with %f" % norm)

        # normalise
        new_data = []
        for foreground, bed, controls, shifted in features_per_interval:

            foreground = foreground._replace(
                counts=numpy.array(foreground.counts, dtype=numpy.float) /
                norm)
            new_controls = []
            for control in controls:
                new_controls.append(
                    control._replace(
                        counts=numpy.array(control.counts, dtype=numpy.float) /
                        norm))
            if shifted:
                shifted = shifted._replace(
                    counts=numpy.array(shifted.counts, dtype=numpy.float) /
                    norm)
            new_data.append(
                IntervalData._make((foreground, bed, new_controls, shifted)))
        features_per_interval = new_data
    else:
        E.info("no normalization performed")

    # center bins
    out_bins = bins[:-1] + options.bin_size

    # build tracks
    def _toTrack(filename):
        return os.path.splitext(os.path.basename(filename))[0]

    outputMatrices(features_per_interval,
                   out_bins,
                   foreground_track=_toTrack(infile),
                   control_tracks=[_toTrack(x) for x in options.control_files],
                   shifted=options.random_shift,
                   sort_orders=options.sort_orders)

    # write footer and output benchmark information.
    E.Stop()