コード例 #1
0
ファイル: gaps.py プロジェクト: zjwang6/jcvi
def estimate(args):
    """
    %prog estimate gaps.bed all.spans.bed all.mates

    Estimate gap sizes based on mate positions and library insert sizes.
    """
    from collections import defaultdict
    from jcvi.formats.bed import intersectBed_wao
    from jcvi.formats.posmap import MatesFile

    p = OptionParser(estimate.__doc__)
    p.add_option("--minlinks",
                 default=3,
                 type="int",
                 help="Minimum number of links to place [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    gapsbed, spansbed, matesfile = args
    mf = MatesFile(matesfile)
    bed = Bed(gapsbed)
    order = bed.order

    gap2mate = defaultdict(set)
    mate2gap = defaultdict(set)

    for a, b in intersectBed_wao(gapsbed, spansbed):
        gapsize = a.span
        if gapsize != 100:
            continue

        gapname = a.accn

        if b is None:
            gap2mate[gapname] = set()
            continue

        matename = b.accn
        gap2mate[gapname].add(matename)
        mate2gap[matename].add(gapname)

    omgapsbed = "gaps.linkage.bed"
    fw = open(omgapsbed, "w")
    for gapname, mates in sorted(gap2mate.items()):
        i, b = order[gapname]
        nmates = len(mates)
        if nmates < opts.minlinks:
            print("{0}\t{1}".format(b, nmates), file=fw)
            continue

        print(gapname, mates)

    fw.close()
コード例 #2
0
ファイル: gaps.py プロジェクト: Hensonmw/jcvi
def estimate(args):
    """
    %prog estimate gaps.bed all.spans.bed all.mates

    Estimate gap sizes based on mate positions and library insert sizes.
    """
    from collections import defaultdict
    from jcvi.formats.bed import intersectBed_wao
    from jcvi.formats.posmap import MatesFile

    p = OptionParser(estimate.__doc__)
    p.add_option("--minlinks", default=3, type="int",
                 help="Minimum number of links to place [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    gapsbed, spansbed, matesfile = args
    mf = MatesFile(matesfile)
    bed = Bed(gapsbed)
    order = bed.order

    gap2mate = defaultdict(set)
    mate2gap = defaultdict(set)

    for a, b in intersectBed_wao(gapsbed, spansbed):
        gapsize = a.span
        if gapsize != 100:
            continue

        gapname = a.accn

        if b is None:
            gap2mate[gapname] = set()
            continue

        matename = b.accn
        gap2mate[gapname].add(matename)
        mate2gap[matename].add(gapname)

    omgapsbed = "gaps.linkage.bed"
    fw = open(omgapsbed, "w")
    for gapname, mates in sorted(gap2mate.items()):
        i, b = order[gapname]
        nmates = len(mates)
        if nmates < opts.minlinks:
            print >> fw, "{0}\t{1}".format(b, nmates)
            continue

        print gapname, mates

    fw.close()
コード例 #3
0
ファイル: fractionation.py プロジェクト: Hensonmw/jcvi
def validate(args):
    """
    %prog validate diploid.napus.fractionation cds.bed

    Check whether [S] intervals overlap with CDS.
    """
    from jcvi.formats.bed import intersectBed_wao

    p = OptionParser(validate.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fractionation, cdsbed = args
    fp = open(fractionation)

    sbed = "S.bed"
    fw = open(sbed, "w")
    for row in fp:
        a, b, c = row.split()
        if not c.startswith("[S]"):
            continue

        tag, (seqid, start, end) = get_tag(c, None)
        print >> fw, "\t".join(str(x) for x in (seqid, start - 1, end, b))

    fw.close()

    pairs = {}
    for a, b in intersectBed_wao(sbed, cdsbed):
        if b is None:
            continue
        pairs[a.accn] = b.accn

    validated = fractionation + ".validated"
    fw = open(validated, "w")
    fp.seek(0)
    fixed = 0
    for row in fp:
        a, b, c = row.split()
        if b in pairs:
            assert c.startswith("[S]")
            c = pairs[b]
            fixed += 1

        print >> fw, "\t".join((a, b, c))

    logging.debug("Fixed {0} [S] cases in `{1}`.".format(fixed, validated))
    fw.close()
コード例 #4
0
ファイル: fractionation.py プロジェクト: zhaotao1987/jcvi
def validate(args):
    """
    %prog validate diploid.napus.fractionation cds.bed

    Check whether [S] intervals overlap with CDS.
    """
    from jcvi.formats.bed import intersectBed_wao

    p = OptionParser(validate.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fractionation, cdsbed = args
    fp = open(fractionation)

    sbed = "S.bed"
    fw = open(sbed, "w")
    for row in fp:
        a, b, c = row.split()
        if not c.startswith("[S]"):
            continue

        tag, (seqid, start, end) = get_tag(c, None)
        print >> fw, "\t".join(str(x) for x in (seqid, start - 1, end, b))

    fw.close()

    pairs = {}
    for a, b in intersectBed_wao(sbed, cdsbed):
        if b is None:
            continue
        pairs[a.accn] = b.accn

    validated = fractionation + ".validated"
    fw = open(validated, "w")
    fp.seek(0)
    fixed = 0
    for row in fp:
        a, b, c = row.split()
        if b in pairs:
            assert c.startswith("[S]")
            c = pairs[b]
            fixed += 1

        print >> fw, "\t".join((a, b, c))

    logging.debug("Fixed {0} [S] cases in `{1}`.".format(fixed, validated))
    fw.close()
コード例 #5
0
ファイル: fractionation.py プロジェクト: Hensonmw/jcvi
def gffselect(args):
    """
    %prog gffselect gmaplocation.bed expectedlocation.bed translated.ids tag

    Try to match up the expected location and gmap locations for particular
    genes. translated.ids was generated by fasta.translate --ids. tag must be
    one of "complete|pseudogene|partial".
    """
    from jcvi.formats.bed import intersectBed_wao

    p = OptionParser(gffselect.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 4:
        sys.exit(not p.print_help())

    gmapped, expected, idsfile, tag = args
    data = get_tags(idsfile)
    completeness = dict((a.replace("mrna", "path"), c) \
                         for (a, b, c) in data)

    seen = set()
    idsfile = expected.rsplit(".", 1)[0] + ".ids"
    fw = open(idsfile, "w")
    cnt = 0
    for a, b in intersectBed_wao(expected, gmapped):
        if b is None:
            continue
        aname, bbname = a.accn, b.accn
        bname = bbname.split(".")[0]
        if completeness[bbname] != tag:
            continue
        if aname == bname:
            if bname in seen:
                continue
            seen.add(bname)
            print >> fw, bbname
            cnt += 1
    fw.close()

    logging.debug("Total {0} records written to `{1}`.".format(cnt, idsfile))
コード例 #6
0
ファイル: fractionation.py プロジェクト: zhaotao1987/jcvi
def gffselect(args):
    """
    %prog gffselect gmaplocation.bed expectedlocation.bed translated.ids tag

    Try to match up the expected location and gmap locations for particular
    genes. translated.ids was generated by fasta.translate --ids. tag must be
    one of "complete|pseudogene|partial".
    """
    from jcvi.formats.bed import intersectBed_wao

    p = OptionParser(gffselect.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 4:
        sys.exit(not p.print_help())

    gmapped, expected, idsfile, tag = args
    data = get_tags(idsfile)
    completeness = dict((a.replace("mrna", "path"), c) \
                         for (a, b, c) in data)

    seen = set()
    idsfile = expected.rsplit(".", 1)[0] + ".ids"
    fw = open(idsfile, "w")
    cnt = 0
    for a, b in intersectBed_wao(expected, gmapped):
        if b is None:
            continue
        aname, bbname = a.accn, b.accn
        bname = bbname.split(".")[0]
        if completeness[bbname] != tag:
            continue
        if aname == bname:
            if bname in seen:
                continue
            seen.add(bname)
            print >> fw, bbname
            cnt += 1
    fw.close()

    logging.debug("Total {0} records written to `{1}`.".format(cnt, idsfile))