Exemple #1
0
def scaffold(args):
    """
    %prog scaffold ctgfasta reads1.fasta mapping1.bed
                            reads2.fasta mapping2.bed ...

    Run BAMBUS on set of contigs, reads and read mappings.
    """

    from jcvi.formats.base import FileMerger
    from jcvi.formats.bed import mates
    from jcvi.formats.contig import frombed
    from jcvi.formats.fasta import join
    from jcvi.utils.iter import grouper

    p = OptionParser(scaffold.__doc__)
    p.add_option("--conf",
                 help="BAMBUS configuration file [default: %default]")
    p.add_option(
        "--prefix",
        default=False,
        action="store_true",
        help="Only keep links between IDs with same prefix [default: %default]"
    )
    opts, args = p.parse_args(args)

    nargs = len(args)
    if nargs < 3 or nargs % 2 != 1:
        sys.exit(not p.print_help())

    ctgfasta = args[0]
    duos = list(grouper(2, args[1:]))
    trios = []
    for fastafile, bedfile in duos:
        prefix = bedfile.rsplit(".", 1)[0]
        matefile = prefix + ".mates"
        matebedfile = matefile + ".bed"
        if need_update(bedfile, [matefile, matebedfile]):
            matesopt = [bedfile, "--lib", "--nointra"]
            if opts.prefix:
                matesopt += ["--prefix"]
            matefile, matebedfile = mates(matesopt)
        trios.append((fastafile, matebedfile, matefile))

    # Merge the readfasta, bedfile and matefile
    bbfasta, bbbed, bbmate = "bambus.reads.fasta", "bambus.bed", "bambus.mates"

    for files, outfile in zip(zip(*trios), (bbfasta, bbbed, bbmate)):
        FileMerger(files, outfile=outfile).merge(checkexists=True)

    ctgfile = "bambus.contig"
    idsfile = "bambus.ids"
    frombedInputs = [bbbed, ctgfasta, bbfasta]
    if need_update(frombedInputs, ctgfile):
        frombed(frombedInputs)

    inputfasta = "bambus.contigs.fasta"
    singletonfasta = "bambus.singletons.fasta"
    cmd = "faSomeRecords {0} {1} ".format(ctgfasta, idsfile)
    sh(cmd + inputfasta)
    sh(cmd + singletonfasta + " -exclude")

    # Run bambus
    prefix = "bambus"
    cmd = "goBambus -c {0} -m {1} -o {2}".format(ctgfile, bbmate, prefix)
    if opts.conf:
        cmd += " -C {0}".format(opts.conf)
    sh(cmd)

    cmd = "untangle -e {0}.evidence.xml -s {0}.out.xml -o {0}.untangle.xml".\
            format(prefix)
    sh(cmd)

    final = "final"
    cmd = "printScaff -e {0}.evidence.xml -s {0}.untangle.xml -l {0}.lib " \
          "-merge -detail -oo -sum -o {1}".format(prefix, final)
    sh(cmd)

    oofile = final + ".oo"
    join([inputfasta, "--oo={0}".format(oofile)])
Exemple #2
0
def coverage(args):
    """
    %prog coverage fastafile ctg bedfile1 bedfile2 ..

    Plot coverage from a set of BED files that contain the read mappings. The
    paired read span will be converted to a new bedfile that contain the happy
    mates. ctg is the chr/scf/ctg that you want to plot the histogram on.

    If the bedfiles already contain the clone spans, turn on --spans.
    """
    from jcvi.formats.bed import mates, bedpe

    p = OptionParser(coverage.__doc__)
    p.add_option("--ymax", default=None, type="int",
                 help="Limit ymax [default: %default]")
    p.add_option("--spans", default=False, action="store_true",
                 help="BED files already contain clone spans [default: %default]")
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) < 3:
        sys.exit(not p.print_help())

    fastafile, ctg = args[0:2]
    bedfiles = args[2:]

    sizes = Sizes(fastafile)
    size = sizes.mapping[ctg]

    plt.figure(1, (iopts.w, iopts.h))
    ax = plt.gca()

    bins = 100  # smooth the curve
    lines = []
    legends = []
    not_covered = []
    yy = .9
    for bedfile, c in zip(bedfiles, "rgbcky"):
        if not opts.spans:
            pf = bedfile.rsplit(".", 1)[0]
            matesfile = pf + ".mates"
            if need_update(bedfile, matesfile):
                matesfile, matesbedfile = mates([bedfile, "--lib"])

            bedspanfile = pf + ".spans.bed"
            if need_update(matesfile, bedspanfile):
                bedpefile, bedspanfile = bedpe([bedfile, "--span",
                    "--mates={0}".format(matesfile)])
            bedfile = bedspanfile

        bedsum = Bed(bedfile).sum(seqid=ctg)
        notcoveredbases = size - bedsum

        legend = bedfile.split(".")[0]
        msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases))
        not_covered.append(msg)
        print >> sys.stderr, msg
        ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes)
        yy -= .08

        cov = Coverage(bedfile, sizes.filename)
        x, y = cov.get_plot_data(ctg, bins=bins)
        line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5)
        lines.append(line)
        legends.append(legend)

    leg = ax.legend(lines, legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(.5)

    ylabel = "Average depth per {0}Kb".format(size / bins / 1000)
    ax.set_xlim(0, size)
    ax.set_ylim(0, opts.ymax)
    ax.set_xlabel(ctg)
    ax.set_ylabel(ylabel)
    set_human_base_axis(ax)

    figname ="{0}.{1}.pdf".format(fastafile, ctg)
    savefig(figname, dpi=iopts.dpi, iopts=iopts)
Exemple #3
0
def coverage(args):
    """
    %prog coverage fastafile ctg bedfile1 bedfile2 ..

    Plot coverage from a set of BED files that contain the read mappings. The
    paired read span will be converted to a new bedfile that contain the happy
    mates. ctg is the chr/scf/ctg that you want to plot the histogram on.

    If the bedfiles already contain the clone spans, turn on --spans.
    """
    from jcvi.formats.bed import mates, bedpe

    p = OptionParser(coverage.__doc__)
    p.add_option("--ymax",
                 default=None,
                 type="int",
                 help="Limit ymax [default: %default]")
    p.add_option(
        "--spans",
        default=False,
        action="store_true",
        help="BED files already contain clone spans [default: %default]")
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) < 3:
        sys.exit(not p.print_help())

    fastafile, ctg = args[0:2]
    bedfiles = args[2:]

    sizes = Sizes(fastafile)
    size = sizes.mapping[ctg]

    plt.figure(1, (iopts.w, iopts.h))
    ax = plt.gca()

    bins = 100  # smooth the curve
    lines = []
    legends = []
    not_covered = []
    yy = .9
    for bedfile, c in zip(bedfiles, "rgbcky"):
        if not opts.spans:
            pf = bedfile.rsplit(".", 1)[0]
            matesfile = pf + ".mates"
            if need_update(bedfile, matesfile):
                matesfile, matesbedfile = mates([bedfile, "--lib"])

            bedspanfile = pf + ".spans.bed"
            if need_update(matesfile, bedspanfile):
                bedpefile, bedspanfile = bedpe(
                    [bedfile, "--span", "--mates={0}".format(matesfile)])
            bedfile = bedspanfile

        bedsum = Bed(bedfile).sum(seqid=ctg)
        notcoveredbases = size - bedsum

        legend = bedfile.split(".")[0]
        msg = "{0}: {1} bp not covered".format(legend,
                                               thousands(notcoveredbases))
        not_covered.append(msg)
        print >> sys.stderr, msg
        ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes)
        yy -= .08

        cov = Coverage(bedfile, sizes.filename)
        x, y = cov.get_plot_data(ctg, bins=bins)
        line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5)
        lines.append(line)
        legends.append(legend)

    leg = ax.legend(lines, legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(.5)

    ylabel = "Average depth per {0}Kb".format(size / bins / 1000)
    ax.set_xlim(0, size)
    ax.set_ylim(0, opts.ymax)
    ax.set_xlabel(ctg)
    ax.set_ylabel(ylabel)
    set_human_base_axis(ax)

    figname = "{0}.{1}.pdf".format(fastafile, ctg)
    savefig(figname, dpi=iopts.dpi, iopts=iopts)
Exemple #4
0
def scaffold(args):
    """
    %prog scaffold ctgfasta reads1.fasta mapping1.bed
                            reads2.fasta mapping2.bed ...

    Run BAMBUS on set of contigs, reads and read mappings.
    """

    from jcvi.formats.base import FileMerger
    from jcvi.formats.bed import mates
    from jcvi.formats.contig import frombed
    from jcvi.formats.fasta import join
    from jcvi.utils.iter import grouper

    p = OptionParser(scaffold.__doc__)
    p.set_rclip(rclip=1)
    p.add_option("--conf", help="BAMBUS configuration file [default: %default]")
    p.add_option("--prefix", default=False, action="store_true",
            help="Only keep links between IDs with same prefix [default: %default]")
    opts, args = p.parse_args(args)

    nargs = len(args)
    if nargs < 3 or nargs % 2 != 1:
        sys.exit(not p.print_help())

    rclip = opts.rclip
    ctgfasta = args[0]
    duos = list(grouper(args[1:], 2))
    trios = []
    for fastafile, bedfile in duos:
        prefix = bedfile.rsplit(".", 1)[0]
        matefile = prefix + ".mates"
        matebedfile = matefile + ".bed"
        if need_update(bedfile, [matefile, matebedfile]):
            matesopt = [bedfile, "--lib", "--nointra",
                        "--rclip={0}".format(rclip),
                        "--cutoff={0}".format(opts.cutoff)]
            if opts.prefix:
                matesopt += ["--prefix"]
            matefile, matebedfile = mates(matesopt)
        trios.append((fastafile, matebedfile, matefile))

    # Merge the readfasta, bedfile and matefile
    bbfasta, bbbed, bbmate = "bambus.reads.fasta", "bambus.bed", "bambus.mates"

    for files, outfile in zip(zip(*trios), (bbfasta, bbbed, bbmate)):
        FileMerger(files, outfile=outfile).merge(checkexists=True)

    ctgfile = "bambus.contig"
    idsfile = "bambus.ids"
    frombedInputs = [bbbed, ctgfasta, bbfasta]
    if need_update(frombedInputs, ctgfile):
        frombed(frombedInputs)

    inputfasta = "bambus.contigs.fasta"
    singletonfasta = "bambus.singletons.fasta"
    cmd = "faSomeRecords {0} {1} ".format(ctgfasta, idsfile)
    sh(cmd + inputfasta)
    sh(cmd + singletonfasta + " -exclude")

    # Run bambus
    prefix = "bambus"
    cmd = "goBambus -c {0} -m {1} -o {2}".format(ctgfile, bbmate, prefix)
    if opts.conf:
        cmd += " -C {0}".format(opts.conf)
    sh(cmd)

    cmd = "untangle -e {0}.evidence.xml -s {0}.out.xml -o {0}.untangle.xml".\
            format(prefix)
    sh(cmd)

    final = "final"
    cmd = "printScaff -e {0}.evidence.xml -s {0}.untangle.xml -l {0}.lib " \
          "-merge -detail -oo -sum -o {1}".format(prefix, final)
    sh(cmd)

    oofile = final + ".oo"
    join([inputfasta, "--oo={0}".format(oofile)])