예제 #1
0
def bisect(args):
    """
    %prog bisect acc accession.fasta

    determine the version of the accession, based on a fasta file.
    This proceeds by a sequential search from xxxx.1 to the latest record.
    """
    p = OptionParser(bisect.__doc__)
    p.set_email()

    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    acc, fastafile = args
    arec = get_first_rec(fastafile)

    valid = None
    for i in range(1, 100):
        term = "%s.%d" % (acc, i)
        try:
            query = list(batch_entrez([term], email=opts.email))
        except AssertionError as e:
            logging.debug("no records found for %s. terminating." % term)
            return

        id, term, handle = query[0]
        brec = SeqIO.parse(handle, "fasta").next()

        match = print_first_difference(arec,
                                       brec,
                                       ignore_case=True,
                                       ignore_N=True,
                                       rc=True)
        if match:
            valid = term
            break

    if valid:
        print
        print green("%s matches the sequence in `%s`" % (valid, fastafile))
예제 #2
0
파일: fetch.py 프로젝트: yangjl/jcvi
def bisect(args):
    """
    %prog bisect acc accession.fasta

    determine the version of the accession by querying entrez, based on a fasta file.
    This proceeds by a sequential search from xxxx.1 to the latest record.
    """
    p = OptionParser(bisect.__doc__)
    p.set_email()

    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    acc, fastafile = args
    arec = get_first_rec(fastafile)

    valid = None
    for i in range(1, 100):
        term = "%s.%d" % (acc, i)
        try:
            query = list(batch_entrez([term], email=opts.email))
        except AssertionError as e:
            logging.debug("no records found for %s. terminating." % term)
            return

        id, term, handle = query[0]
        brec = SeqIO.parse(handle, "fasta").next()

        match = print_first_difference(arec, brec, ignore_case=True,
                ignore_N=True, rc=True)
        if match:
            valid = term
            break

    if valid:
        print
        print green("%s matches the sequence in `%s`" % (valid, fastafile))
예제 #3
0
파일: base.py 프로젝트: yangjl/jcvi
def asciiplot(x, y, digit=1, width=50, title=None, char="="):
    """
    Print out a horizontal plot using ASCII chars.
    width is the textwidth (height) of the plot.
    """
    ax = np.array(x)
    ay = np.array(y)

    if title:
        print >> sys.stderr, dark(title)

    az = ay * width / ay.max()
    tx = [asciiaxis(x, digit=digit) for x in ax]
    rjust = max([len(x) for x in tx]) + 1

    for x, y, z in zip(tx, ay, az):
        x = x.rjust(rjust)
        y = y or ""
        z = green(char * z)
        print >> sys.stderr, "{0} |{1} {2}".format(x, z, y)
예제 #4
0
def asciiplot(x, y, digit=1, width=50, title=None, char="="):
    """
    Print out a horizontal plot using ASCII chars.
    width is the textwidth (height) of the plot.
    """
    ax = np.array(x)
    ay = np.array(y)

    if title:
        print(dark(title), file=sys.stderr)

    az = ay * width // ay.max()
    tx = [asciiaxis(x, digit=digit) for x in ax]
    rjust = max([len(x) for x in tx]) + 1

    for x, y, z in zip(tx, ay, az):
        x = x.rjust(rjust)
        y = y or ""
        z = green(char * z)
        print("{0} |{1} {2}".format(x, z, y), file=sys.stderr)
예제 #5
0
파일: ca.py 프로젝트: arvin580/jcvi
def overlap(args):
    """
    %prog overlap best.contains iid

    Visualize overlaps for a given fragment. Must be run in 4-unitigger. All
    overlaps for iid were retrieved, excluding the ones matching best.contains.
    """
    from jcvi.apps.console import green

    p = OptionParser(overlap.__doc__)
    p.add_option("--maxerr", default=2, type="int", help="Maximum error rate")
    p.add_option("--canvas", default=100, type="int", help="Canvas size")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bestcontains, iid = args
    canvas = opts.canvas

    bestcontainscache = bestcontains + ".cache"
    if need_update(bestcontains, bestcontainscache):
        fp = open(bestcontains)
        fw = open(bestcontainscache, "w")
        exclude = set()
        for row in fp:
            if row[0] == "#":
                continue
            j = int(row.split()[0])
            exclude.add(j)
        cPickle.dump(exclude, fw)
        fw.close()

    exclude = cPickle.load(open(bestcontainscache))
    logging.debug("A total of {0} reads to exclude".format(len(exclude)))

    cmd = "overlapStore -d ../asm.ovlStore -b {0} -e {0}".format(iid)
    cmd += " -E {0}".format(opts.maxerr)
    frags = []
    for row in popen(cmd):
        r = OverlapLine(row)
        if r.bid in exclude:
            continue
        frags.append(r)

    # Also include to query fragment
    frags.append(OverlapLine("{0} {0} N 0 0 0 0".format(iid)))
    frags.sort(key=lambda x: x.ahang)

    # Determine size of the query fragment
    cmd = "gatekeeper -b {0} -e {0}".format(iid)
    cmd += " -tabular -dumpfragments ../asm.gkpStore"
    fp = popen(cmd)
    row = fp.next()
    size = int(fp.next().split()[-1])

    # Determine size of canvas
    xmin = min(x.ahang for x in frags)
    xmax = max(x.bhang for x in frags)
    xsize = -xmin + size + xmax
    ratio = xsize / canvas

    fw = sys.stdout
    for f in frags:
        fsize = -f.ahang + size + f.bhang
        a = (f.ahang - xmin) / ratio
        b = fsize / ratio
        t = "-" * b
        if f.orientation == "N":
            t = t[:-1] + ">"
        else:
            t = "<" + t[1:]
        if f.ahang == 0 and f.bhang == 0:
            t = green(t)
        c = canvas - a - b
        fw.write(" " * a)
        fw.write(t)
        fw.write(" " * c)
        print >> fw, "{0} ({1})".format(str(f.bid).rjust(10), f.erate_adj)
예제 #6
0
파일: hic.py 프로젝트: zengxiaofei/jcvi
from jcvi.formats.base import LineFile, must_open
from jcvi.formats.bed import Bed
from jcvi.formats.sizes import Sizes
from jcvi.formats.blast import Blast
from jcvi.graphics.base import normalize_axes, plt, savefig
from jcvi.graphics.dotplot import dotplot
from jcvi.utils.cbook import gene_name, human_size
from jcvi.utils.natsort import natsorted

# Map orientations to ints
FF = {'+': 1, '-': -1, '?': 1}
RR = {'+': -1, '-': 1, '?': -1}
LB = 18  # Lower bound for golden_array()
UB = 29  # Upper bound for golden_array()
BB = UB - LB + 1  # Span for golden_array()
ACCEPT = green("ACCEPT")
REJECT = red("REJECT")
BINSIZE = 50000


class ContigOrderingLine(object):
    '''Stores one line in the ContigOrdering file
    '''
    def __init__(self, line, sep="|"):
        args = line.split()
        self.contig_id = args[0]
        self.contig_name = args[1].split(sep)[0]
        contig_rc = args[2]
        assert contig_rc in ('0', '1')
        self.strand = '+' if contig_rc == '0' else '-'
        self.orientation_score = args[3]
예제 #7
0
def overlap(args):
    """
    %prog overlap best.contains iid

    Visualize overlaps for a given fragment. Must be run in 4-unitigger. All
    overlaps for iid were retrieved, excluding the ones matching best.contains.
    """
    from jcvi.apps.console import green

    p = OptionParser(overlap.__doc__)
    p.add_option("--maxerr", default=2, type="int", help="Maximum error rate")
    p.add_option("--canvas", default=100, type="int", help="Canvas size")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bestcontains, iid = args
    canvas = opts.canvas

    bestcontainscache = bestcontains + ".cache"
    if need_update(bestcontains, bestcontainscache):
        fp = open(bestcontains)
        fw = open(bestcontainscache, "w")
        exclude = set()
        for row in fp:
            if row[0] == '#':
                continue
            j = int(row.split()[0])
            exclude.add(j)
        dump(exclude, fw)
        fw.close()

    exclude = load(open(bestcontainscache))
    logging.debug("A total of {0} reads to exclude".format(len(exclude)))

    cmd = "overlapStore -d ../asm.ovlStore -b {0} -e {0}".format(iid)
    cmd += " -E {0}".format(opts.maxerr)
    frags = []
    for row in popen(cmd):
        r = OverlapLine(row)
        if r.bid in exclude:
            continue
        frags.append(r)

    # Also include to query fragment
    frags.append(OverlapLine("{0} {0} N 0 0 0 0".format(iid)))
    frags.sort(key=lambda x: x.ahang)

    # Determine size of the query fragment
    cmd = "gatekeeper -b {0} -e {0}".format(iid)
    cmd += " -tabular -dumpfragments ../asm.gkpStore"
    fp = popen(cmd)
    row = next(fp)
    size = int(fp.next().split()[-1])

    # Determine size of canvas
    xmin = min(x.ahang for x in frags)
    xmax = max(x.bhang for x in frags)
    xsize = -xmin + size + xmax
    ratio = xsize / canvas

    fw = sys.stdout
    for f in frags:
        fsize = -f.ahang + size + f.bhang
        a = (f.ahang - xmin) / ratio
        b = fsize / ratio
        t = '-' * b
        if f.orientation == 'N':
            t = t[:-1] + '>'
        else:
            t = '<' + t[1:]
        if f.ahang == 0 and f.bhang == 0:
            t = green(t)
        c = canvas - a - b
        fw.write(' ' * a)
        fw.write(t)
        fw.write(' ' * c)
        print("{0} ({1})".format(str(f.bid).rjust(10), f.erate_adj), file=fw)
예제 #8
0
파일: hic.py 프로젝트: xuanblo/jcvi
from jcvi.formats.bed import Bed
from jcvi.formats.sizes import Sizes
from jcvi.formats.blast import Blast
from jcvi.graphics.base import normalize_axes, plt, savefig
from jcvi.graphics.dotplot import dotplot
from jcvi.utils.cbook import gene_name, human_size
from jcvi.utils.natsort import natsorted


# Map orientations to ints
FF = {'+': 1, '-': -1, '?': 1}
RR = {'+': -1, '-': 1, '?': -1}
LB = 18             # Lower bound for golden_array()
UB = 29             # Upper bound for golden_array()
BB = UB - LB + 1    # Span for golden_array()
ACCEPT = green("ACCEPT")
REJECT = red("REJECT")
BINSIZE = 50000


class ContigOrderingLine(object):
    '''Stores one line in the ContigOrdering file
    '''
    def __init__(self, line, sep="|"):
        args = line.split()
        self.contig_id = args[0]
        self.contig_name = args[1].split(sep)[0]
        contig_rc = args[2]
        assert contig_rc in ('0', '1')
        self.strand = '+' if contig_rc == '0' else '-'
        self.orientation_score = args[3]