Beispiel #1
0
def test_autoremove_2():
    with file.autoremove():
        nm = file.tmpfile()
        with open(nm, 'w') as f:
            print >> f, 'hello world'
        assert os.path.isfile(nm)
    assert not os.path.isfile(nm)
Beispiel #2
0
def test_merg2_0():
    K = 27
    M = (1 << (2 * K)) - 1
    N = 100000
    random.seed(17)
    xs = [(random.randint(0, M), pois(10)) for i in xrange(N)]
    xs.sort()
    ys = [(random.randint(0, M), pois(10)) for i in xrange(N)]
    ys.sort()

    nm0 = tmpfile()
    with container(nm0, 'w') as z:
        writeKmersAndCounts(K, xs, z, 'xs')
        writeKmersAndCounts(K, ys, z, 'ys')

    nm1 = tmpfile()
    h = {}
    with container(nm0, 'r') as z0, container(nm1, 'w') as z:
        merge2(z, K, readKmersAndCounts(z0, 'xs'),
               readKmersAndCounts(z0, 'ys'), h, 'zs')
    h = h.items()
    h.sort()

    ws = {}
    for (x, c) in xs:
        ws[x] = c + ws.get(x, 0)
    for (y, c) in ys:
        ws[y] = c + ws.get(y, 0)
    ws = ws.items()
    ws.sort()

    with container(nm1, 'r') as z:
        zs = list(readKmersAndCounts(z, 'zs'))

    assert len(ws) == len(zs)
    for i in xrange(len(ws)):
        assert ws[i] == zs[i]

    h1 = {}
    for (_, c) in ws:
        h1[c] = 1 + h1.get(c, 0)
    h1 = h1.items()
    h1.sort()

    assert len(h) == len(h1)
    for i in xrange(len(h)):
        assert h[i] == h1[i]
Beispiel #3
0
def test_autoremove_3():
    try:
        with file.autoremove():
            nm = file.tmpfile()
            with open(nm, 'w') as f:
                print >> f, 'hello world'
            assert os.path.isfile(nm)
            raise StopIteration
        assert False
    except StopIteration:
        pass
    assert not os.path.isfile(nm)
Beispiel #4
0
def test_rw_2():
    K = 27
    M = (1 << (2 * K)) - 1
    N = 100000
    random.seed(17)
    xs = [random.randint(0, M) for i in xrange(N)]
    nm = tmpfile()
    with container.container(nm, 'w') as z:
        w = vecs.writer64(z, "wibble")
        for x in xs:
            w.append(x)
    with pytest.raises(KeyError):
        with container.container(nm, 'r') as z:
            ys = list(vecs.read64(z, 'wibble', N))

    os.remove(nm)
Beispiel #5
0
def test_std_0():
    K = 27
    M = (1 << (2 * K)) - 1
    N = 100000
    random.seed(17)
    xs = [(random.randint(0, M), pois(10)) for i in xrange(N)]
    nm = tmpfile()
    with container.container(nm, 'w') as z:
        std.writeKmersAndCounts(K, xs, z, 'wibble')
    with container.container(nm, 'r') as z:
        ys = list(std.readKmersAndCounts(z, 'wibble'))

    assert len(ys) == N
    for i in xrange(N):
        assert xs[i] == ys[i]

    os.remove(nm)
Beispiel #6
0
def test_rw_1():
    K = 27
    M = (1 << (2 * K)) - 1
    N = 100000
    random.seed(17)
    xs = [random.randint(0, M) for i in xrange(N)]
    nm = tmpfile()
    with container.container(nm, 'w') as z:
        vecs.write64(z, xs, "wibble")
    with container.container(nm, 'r') as z:
        ys = list(vecs.read64(z, 'wibble', N))

    assert len(ys) == N
    for i in xrange(N):
        assert xs[i] == ys[i]

    os.remove(nm)
Beispiel #7
0
def main(argv):
    opts = docopt.docopt(__doc__, argv)

    K = None

    out = opts['<output>']

    px = list(pairs(opts['<input>']))
    if len(px) == 1:
        with container(out, 'w') as z:
            h = {}
            acgt = [0, 0, 0, 0]
            ix = px[0]
            if len(ix) == 1:
                with container(ix[0], 'r') as z0:
                    K = z0.meta['K']
                    xs = readKmersAndCounts(z0)
                    zs = hist(xs, h, acgt)
                    writeKmersAndCounts(K, xs, z)
            else:
                with container(ix[0], 'r') as z0:
                    K = z0.meta['K']
                    xs = readKmersAndCounts(z0)
                    with container(ix[1], 'r') as z1:
                        K1 = z1.meta['K']
                        if K1 != K:
                            print >> sys.stderr, "mismatched K"
                            sys.exit(1)
                        ys = readKmersAndCounts(z1)
                        zs = hist(merge(xs, ys), h, acgt)
                        writeKmersAndCounts(K, zs, z)
            n = float(sum(acgt))
            acgt = [c/n for c in acgt]
            z.meta['hist'] = h
            z.meta['acgt'] = acgt
        return

    tmps = []
    tmpnm = tmpfile('.pmc')
    with container(tmpnm, 'w') as z:
        for ix in px:
            if len(ix) == 1:
                nm = 'tmp-' + str(len(tmps))
                tmps.append(nm)
                with container(ix[0], 'r') as z0:
                    if K is None:
                        K = z0.meta['K']
                    else:
                        K0 = z0.meta['K']
                        if K0 != K:
                            print >> sys.stderr, "mismatched K"
                            sys.exit(1)
                    xs = readKmersAndCounts(z0)
                    writeKmersAndCounts(K, xs, z, nm)
            else:
                nm = 'tmp-' + str(len(tmps))
                tmps.append(nm)
                with container(ix[0], 'r') as z0:
                    if K is None:
                        K = z0.meta['K']
                    else:
                        K0 = z0.meta['K']
                        if K0 != K:
                            print >> sys.stderr, "mismatched K"
                            sys.exit(1)
                    xs = readKmersAndCounts(z0)
                    with container(ix[1], 'r') as z1:
                        K1 = z1.meta['K']
                        if K1 != K:
                            print >> sys.stderr, "mismatched K"
                            sys.exit(1)
                        ys = readKmersAndCounts(z1)
                        writeKmersAndCounts(K, merge(xs, ys), z, nm)

    assert K is not None

    with container(out, 'w') as z:
        h = {}
        acgt = [0, 0, 0, 0]
        with container(tmpnm, 'r') as z0:
            zs = None
            for fn in tmps:
                xs = readKmersAndCounts(z0, fn)
                if zs is None:
                    zs = xs
                else:
                    zs = merge(zs, xs)
            zs = hist(zs, h, acgt)
            writeKmersAndCounts(K, zs, z)
        n = float(sum(acgt))
        acgt = [c/n for c in acgt]
        z.meta['hist'] = h
        z.meta['acgt'] = acgt

    os.remove(tmpnm)
Beispiel #8
0
 def __init__(self, z, zfn, comp):
     self.z = z
     self.zfn = zfn
     self.comp = comp
     self.tfn = tmpfile('.szf')
     self.tf = open(self.tfn, 'w')
Beispiel #9
0
def main(argv):
    opts = docopt.docopt(__doc__, argv)

    K = int(opts['<k>'])
    out = opts['<output>']
    Z = 1024 * 1024 * 32
    if opts['-m'] is not None:
        Z = 1024 * 1024 * int(opts['-m'])

    buf = KmerAccumulator()
    n = 0
    tmps = []
    acgt = [0, 0, 0, 0]
    m = 0

    d = None
    if opts['-D'] is not None:
        d = float(opts['-D'])

        S = 0
        if opts['-S'] is not None:
            S = int(opts['-S'])

        cacheYes = set([])
        cacheNo = set([])

    tmpnm = tmpfile('.pmc')
    with container(tmpnm, 'w') as z:
        pass

    PN = 1024 * 1024

    nr = 0
    t0 = time.time()
    for fn in opts['<input>']:
        for rds in mkParser(fn):
            for (nm, seq) in rds:
                nr += 1
                if nr & (PN - 1) == 0:
                    t1 = time.time()
                    print >> sys.stderr, 'reads processed:', nr, (PN) / (
                        t1 - t0), 'reads/second'
                    t0 = t1
                    #buf.stat()
                xs = kmersList(K, seq, True)
                if d is None:
                    buf.addList(xs)
                    for x in xs:
                        acgt[x & 3] += 1
                        m += 1
                        n += 1
                else:
                    for x in xs:
                        if x in cacheNo:
                            continue
                        if x not in cacheYes:
                            if not sub(S, d, x):
                                cacheNo.add(x)
                                continue
                            cacheYes.add(x)
                        buf.add(x)
                        acgt[x & 3] += 1
                        m += 1
                        n += 1
                    if len(cacheYes) > 1000000:
                        cacheYes = set([])
                    if len(cacheNo) > 1000000:
                        cacheNo = set([])
                if 8 * n >= Z:
                    fn = 'tmps-%d' % (len(tmps), )
                    #print >> sys.stderr, "writing " + fn + "\t" + tmpnm
                    tmps.append(fn)
                    with container(tmpnm, 'a') as z:
                        writeKmersAndCounts(K, mkPairs(buf.kmers()), z, fn)
                    buf.clear()
                    n = 0

    t1 = time.time()
    print >> sys.stderr, 'reads processed:', nr, (nr % PN) / (
        t1 - t0), 'reads/second'

    if len(tmps) and len(buf):
        fn = 'tmps-%d' % (len(tmps), )
        #print >> sys.stderr, "writing " + fn + "\t" + tmpnm
        tmps.append(fn)
        with container(tmpnm, 'a') as z:
            writeKmersAndCounts(K, mkPairs(buf.kmers()), z, fn)
        buf = []

    while len(tmps) > 2:
        tmpnm2 = tmpfile('.pmc')
        tmps2 = []
        with container(tmpnm, 'r') as z0, container(tmpnm2, 'w') as z:
            ps = pairs(tmps)
            for p in ps:
                fn = 'tmps-%d' % (len(tmps2), )
                tmps2.append(fn)
                if len(p) == 1:
                    writeKmersAndCounts(K, readKmersAndCounts(z0, p[0]), z, fn)
                    continue
                h = {}
                merge2(z, K, readKmersAndCounts(z0, p[0]),
                       readKmersAndCounts(z0, p[1]), h, fn)
        os.remove(tmpnm)
        tmpnm = tmpnm2
        tmps = tmps2

    with container(out, 'w') as z:
        h = {}
        if len(tmps) == 0:
            zs = hist(mkPairs(buf.kmers()), h)
            writeKmersAndCounts(K, zs, z)
        elif len(tmps) == 1:
            with container(tmpnm, 'r') as z0:
                writeKmersAndCounts(K, hist(readKmersAndCounts(z0, tmps[0]),
                                            h), z)
        else:
            assert len(tmps) == 2
            with container(tmpnm, 'r') as z0:
                merge2(z, K, readKmersAndCounts(z0, tmps[0]),
                       readKmersAndCounts(z0, tmps[1]), h)
        n = float(sum(acgt))
        acgt = [c / n for c in acgt]
        z.meta['hist'] = h
        z.meta['acgt'] = acgt
        z.meta['reads'] = nr
Beispiel #10
0
def test_autoremove_1():
    nm = file.tmpfile('wibble')
    with open(nm, 'w') as f:
        print >> f, 'hello world'
    assert os.path.isfile(nm)
    os.remove(nm)
Beispiel #11
0
def test_autoremove_0():
    nm = file.tmpfile('wibble')
    assert not os.path.isfile(nm)