def test_splice_metrics_fullsize_prots(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') ncontact_cut = 10 rms_cut = 1.5 u = Vertex(bbs, '_C') v = Vertex(bbs, 'N_') m = splice_metrics(u, bbs, v, bbs, skip_on_fail=False) nclash = np.sum(m.nclash == 0) ncontact = np.sum(m.ncontact >= ncontact_cut) nrms = np.sum(m.rms <= rms_cut) print(nrms, ncontact, nclash) assert nrms == 36 assert nclash == 1213 assert ncontact == 1419 u = Vertex(bbs, '_N') v = Vertex(bbs, 'C_') m = splice_metrics(u, bbs, v, bbs, skip_on_fail=False) nclash = np.sum(m.nclash == 0) ncontact = np.sum(m.ncontact >= ncontact_cut) nrms = np.sum(m.rms <= rms_cut) assert nclash == 1213 assert ncontact == 1419 assert nrms == 36
def test_edge_fullsize_prots(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') u = Vertex(bbs, '_C') v = Vertex(bbs, 'N_') e = Edge(u, bbs, v, bbs) # print('allowed splices table') # print(e.splices.shape) # print(e.splices) # for i in range(e.len): # print(i, e.allowed_entries(i)) assert np.all(e.allowed_entries(0) == [22]) assert np.all(e.allowed_entries(1) == []) assert np.all(e.allowed_entries(2) == []) assert np.all(e.allowed_entries(3) == []) assert np.all(e.allowed_entries(4) == []) assert np.all(e.allowed_entries(5) == []) assert np.all(e.allowed_entries(6) == []) assert np.all(e.allowed_entries(7) == []) assert np.all(e.allowed_entries(8) == []) assert np.all(e.allowed_entries(9) == []) assert np.all(e.allowed_entries(10) == []) assert np.all(e.allowed_entries(11) == []) assert np.all(e.allowed_entries(12) == []) assert np.all(e.allowed_entries(13) == []) assert np.all(e.allowed_entries(14) == []) assert np.all(e.allowed_entries(15) == []) assert np.all(e.allowed_entries(16) == []) assert np.all(e.allowed_entries(17) == []) assert np.all(e.allowed_entries(18) == [40]) assert np.all(e.allowed_entries(19) == [21, 60]) assert np.all(e.allowed_entries(20) == []) assert np.all(e.allowed_entries(21) == [0, 58]) assert np.all(e.allowed_entries(22) == [1, 57, 59, 60]) assert np.all(e.allowed_entries(23) == [20, 58, 59, 60])
def test_linear_search_three(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') u = Vertex(bbs, '_C') v = Vertex(bbs, 'NC') w = Vertex(bbs, 'N_') verts = (u, v, w) kw = dict( splice_max_rms=0.5, splice_ncontact_cut=30, splice_clash_d2=4.0**2, # ca only splice_contact_d2=8.0**2, splice_rms_range=6, splice_clash_contact_range=60, splice_clash_contact_by_helix=False) e = Edge(u, bbs, v, bbs, **kw) f = Edge(v, bbs, w, bbs, **kw) edges = (e, f) # print('------------- e ---------------') # _print_splices(e) # print('------------- f ---------------') # _print_splices(f) # print('------------- result ---------------') ssdag = SearchSpaceDag(None, (bbs, ) * 3, verts, edges) result = grow_linear(ssdag, no_duplicate_bases=False) # from time import clock # t = clock() # for i in range(100): # grow_linear(verts, edges) # print('time 10', clock() - t) # assert 0 assert np.allclose(result.pos[:, 0], np.eye(4)) idx = _expand_inout_indices(verts, result.idx) isort = np.lexsort((idx[:, 3], idx[:, 2], idx[:, 1], idx[:, 0])) idx = idx[isort, :] assert len(idx) == _num_splices(e) * _num_splices(f) np.set_printoptions(threshold=np.nan) print(repr(idx)) assert np.all(idx == [ [0, 19, 0, 3], [0, 19, 0, 60], [0, 19, 16, 39], [0, 19, 17, 0], [0, 19, 17, 58], [0, 19, 18, 59], [0, 19, 22, 20], [0, 19, 22, 59], [0, 19, 23, 39], [0, 19, 23, 40], [0, 19, 23, 60], [17, 17, 0, 3], [17, 17, 0, 60], [17, 17, 16, 39], [17, 17, 17, 0], [17, 17, 17, 58], [17, 17, 18, 59], [17, 17, 22, 20], [17, 17, 22, 59], [17, 17, 23, 39], [17, 17, 23, 40], [17, 17, 23, 60], [18, 18, 0, 3], [18, 18, 0, 60], [18, 18, 16, 39], [18, 18, 17, 0], [18, 18, 17, 58], [18, 18, 18, 59], [18, 18, 22, 20], [18, 18, 22, 59], [18, 18, 23, 39], [18, 18, 23, 40], [18, 18, 23, 60], [22, 18, 0, 3], [22, 18, 0, 60], [22, 18, 16, 39], [22, 18, 17, 0], [22, 18, 17, 58], [22, 18, 18, 59], [22, 18, 22, 20], [22, 18, 22, 59], [22, 18, 23, 39], [22, 18, 23, 40], [22, 18, 23, 60], [23, 19, 0, 3], [23, 19, 0, 60], [23, 19, 16, 39], [23, 19, 17, 0], [23, 19, 17, 58], [23, 19, 18, 59], [23, 19, 22, 20], [23, 19, 22, 59], [23, 19, 23, 39], [23, 19, 23, 40], [23, 19, 23, 60] ])
def test_linear_search_two(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') u = Vertex(bbs, '_C') v = Vertex(bbs, 'N_') verts = (u, v) kw = dict( splice_max_rms=0.7, splice_ncontact_cut=30, splice_clash_d2=4.0**2, # ca only splice_contact_d2=8.0**2, splice_rms_range=6, splice_clash_contact_range=60, splice_clash_contact_by_helix=False) edges = (Edge(u, bbs, v, bbs, **kw), ) assert np.all(u.inout[:, 1] == np.arange(u.len)) assert np.all(v.inout[:, 0] == np.arange(v.len)) ssdag = SearchSpaceDag(None, (bbs, ) * 2, verts, edges) result = grow_linear(ssdag, no_duplicate_bases=False) assert np.allclose(result.pos[:, 0], np.eye(4)) isort = np.lexsort((result.idx[:, 1], result.idx[:, 0])) sortidx = result.idx[isort, :] print(repr(sortidx)) assert np.all( sortidx == [[0, 3], [0, 24], [0, 41], [0, 60], [1, 22], [1, 25], [16, 3], [16, 39], [16, 40], [16, 57], [16, 60], [17, 0], [17, 22], [17, 40], [17, 55], [17, 58], [18, 23], [18, 38], [18, 55], [18, 59], [19, 24], [19, 41], [19, 56], [19, 60], [20, 18], [20, 57], [21, 58], [22, 20], [22, 23], [22, 38], [22, 39], [22, 59], [22, 60], [23, 24], [23, 39], [23, 40], [23, 41], [23, 54], [23, 60]])
def perf_grow_2(bbdb, maxbb=10, shuf=0): ttot = time() tdb = time() bbs = dict(C3_N=bbdb.query('C3_N', max_bblocks=maxbb, shuffle=shuf), C3_C=bbdb.query('C3_C', max_bblocks=maxbb, shuffle=shuf)) tdb = time() - tdb tvertex = time() ubbs = bbs['C3_N'] vbbs = bbs['C3_C'] bbs = (ubbs, vbbs) u = Vertex(ubbs, '_N', min_seg_len=15, parallel=1) v = Vertex(vbbs, 'C_', min_seg_len=15, parallel=1) V = (u, v) tvertex = time() - tvertex tedge = time() E = [Edge(u, ubbs, v, vbbs, parallel=1)] print('e.total_allowed_splices()', e.total_allowed_splices()) tedge = time() - tedge # print(f'edge creation time {tedge:7.3f} {e.len} {f.len}') tgrow = time() worms = grow_linear(V, E, loss_function=lossfunc_rand_1_in(1), parallel=1) tgrow = time() - tgrow Nres = len(worms.losses) Ntot = u.len * v.len ttot = time() - ttot factor = np.log10(Ntot / (Nres + 1)) - 3 # every 1000th print(f' perf_grow_2 {maxbb:4} {ttot:7.1f}s {Nres:12,} {Ntot:20,} tv' f' {tvertex:7.1f}s te {tedge:7.1f}s tg {tgrow:7.1f}s {factor:10.3f}')
def test_splice_metrics_run(bbdb): bbs = bbdb.query('all') for pre in '_NC': for post in '_NC': for d in ('CN', 'NC'): dirn = pre + d + post u = Vertex(bbs, dirn[:2]) v = Vertex(bbs, dirn[2:]) m = splice_metrics(u, bbs, v, bbs)
def test_linear_search_two(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') u = Vertex(bbs, '_C') v = Vertex(bbs, 'N_') e = Edge(u, bbs, v, bbs) assert np.all(u.inout[:, 1] == np.arange(u.len)) assert np.all(v.inout[:, 0] == np.arange(v.len)) result = grow_linear((u, v), (e, )) assert np.allclose(result.positions[:, 0], np.eye(4)) assert np.all( result.indices == [[0, 22], [18, 40], [19, 21], [19, 60], [21, 0], [21, 58], [22, 1], [22, 57], [22, 59], [22, 60], [23, 20], [23, 58], [23, 59], [23, 60]]) # yapf: disable
def linear_NC_gragh(n, uwv, efg): u = Vertex(bbs, '_C') v = Vertex(bbs, 'NC') w = Vertex(bbs, 'N_') V = (u, ) + ((v, ) * (n - 2)) + (w, ) assert len(V) == n e = Edge(u, bbs, v, bbs) f = Edge(v, bbs, v, bbs) g = Edge(v, bbs, w, bbs) E = (e, ) + ((f, ) * (n - 3)) + (g, ) if n == 2: E = (Edge(u, bbs, w, bbs), ) print('foo') print(E) assert len(E) == n - 1 return V, E
def test_linear_search_three(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') u = Vertex(bbs, '_C') v = Vertex(bbs, 'NC') w = Vertex(bbs, 'N_') verts = (u, v, w) e = Edge(u, bbs, v, bbs) f = Edge(v, bbs, w, bbs) edges = (e, f) # print('------------- e ---------------') # _print_splices(e) # print('------------- f ---------------') # _print_splices(f) # print('------------- result ---------------') result = grow_linear(verts, edges) # from time import clock # t = clock() # for i in range(100): # grow_linear(verts, edges) # print('time 10', clock() - t) # assert 0 idx = _expand_inout_indices(verts, result.indices) assert np.allclose(result.positions[:, 0], np.eye(4)) assert len(idx) == _num_splices(e) * _num_splices(f) assert np.all(idx == [ [19, 19, 0, 22], [19, 19, 18, 40], [19, 19, 19, 21], [19, 19, 19, 60], [19, 19, 21, 0], [19, 19, 21, 58], [19, 19, 22, 1], [19, 19, 22, 57], [19, 19, 22, 59], [19, 19, 22, 60], [19, 19, 23, 20], [19, 19, 23, 58], [19, 19, 23, 59], [19, 19, 23, 60], [21, 17, 0, 22], [21, 17, 18, 40], [21, 17, 19, 21], [21, 17, 19, 60], [21, 17, 21, 0], [21, 17, 21, 58], [21, 17, 22, 1], [21, 17, 22, 57], [21, 17, 22, 59], [21, 17, 22, 60], [21, 17, 23, 20], [21, 17, 23, 58], [21, 17, 23, 59], [21, 17, 23, 60], [22, 16, 0, 22], [22, 16, 18, 40], [22, 16, 19, 21], [22, 16, 19, 60], [22, 16, 21, 0], [22, 16, 21, 58], [22, 16, 22, 1], [22, 16, 22, 57], [22, 16, 22, 59], [22, 16, 22, 60], [22, 16, 23, 20], [22, 16, 23, 58], [22, 16, 23, 59], [22, 16, 23, 60], [22, 18, 0, 22], [22, 18, 18, 40], [22, 18, 19, 21], [22, 18, 19, 60], [22, 18, 21, 0], [22, 18, 21, 58], [22, 18, 22, 1], [22, 18, 22, 57], [22, 18, 22, 59], [22, 18, 22, 60], [22, 18, 23, 20], [22, 18, 23, 58], [22, 18, 23, 59], [22, 18, 23, 60], [22, 19, 0, 22], [22, 19, 18, 40], [22, 19, 19, 21], [22, 19, 19, 60], [22, 19, 21, 0], [22, 19, 21, 58], [22, 19, 22, 1], [22, 19, 22, 57], [22, 19, 22, 59], [22, 19, 22, 60], [22, 19, 23, 20], [22, 19, 23, 58], [22, 19, 23, 59], [22, 19, 23, 60], [23, 17, 0, 22], [23, 17, 18, 40], [23, 17, 19, 21], [23, 17, 19, 60], [23, 17, 21, 0], [23, 17, 21, 58], [23, 17, 22, 1], [23, 17, 22, 57], [23, 17, 22, 59], [23, 17, 22, 60], [23, 17, 23, 20], [23, 17, 23, 58], [23, 17, 23, 59], [23, 17, 23, 60], [23, 18, 0, 22], [23, 18, 18, 40], [23, 18, 19, 21], [23, 18, 19, 60], [23, 18, 21, 0], [23, 18, 21, 58], [23, 18, 22, 1], [23, 18, 22, 57], [23, 18, 22, 59], [23, 18, 22, 60], [23, 18, 23, 20], [23, 18, 23, 58], [23, 18, 23, 59], [23, 18, 23, 60], [23, 19, 0, 22], [23, 19, 18, 40], [23, 19, 19, 21], [23, 19, 19, 60], [23, 19, 21, 0], [23, 19, 21, 58], [23, 19, 22, 1], [23, 19, 22, 57], [23, 19, 22, 59], [23, 19, 22, 60], [23, 19, 23, 20], [23, 19, 23, 58], [23, 19, 23, 59], [23, 19, 23, 60] ]) # yapf: disable
def test_edge_fullsize_prots(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') # spdb = None u = Vertex(bbs, '_C') v = Vertex(bbs, 'N_') e = Edge(u, bbs, v, bbs, splice_max_rms=0.7, splice_rms_range=5, splice_ncontact_cut=7, splice_clash_contact_range=9, splice_clash_contact_by_helix=False ) # yapf: disable assert np.all(e.allowed_entries(0) == [1, 4, 26]) assert np.all(e.allowed_entries(1) == [4, 26]) assert np.all(e.allowed_entries(2) == [4, 5, 26, 42]) assert np.all(e.allowed_entries(3) == [5, 42]) assert np.all(e.allowed_entries(15) == [4]) assert np.all(e.allowed_entries(16) == [26, 42]) assert np.all(e.allowed_entries(17) == [4, 26, 42]) assert np.all(e.allowed_entries(18) == [1, 26]) assert np.all(e.allowed_entries(19) == [26, 42]) assert np.all(e.allowed_entries(20) == [26]) assert np.all(e.allowed_entries(21) == [4]) assert np.all(e.allowed_entries(22) == [1]) assert np.all(e.allowed_entries(23) == [26, 42]) for i in [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]: assert len(e.allowed_entries(i)) == 0 u = Vertex(bbs, 'NC') v = Vertex(bbs, 'NN') e = Edge(u, bbs, v, bbs, splice_max_rms=0.7, splice_rms_range=5, splice_ncontact_cut=7, splice_clash_contact_range=9,splice_clash_contact_by_helix=False ) # yapf: disable assert np.all(e.allowed_entries(0) == [1, 4, 26]) assert np.all(e.allowed_entries(1) == [4, 26]) assert np.all(e.allowed_entries(2) == [4, 5, 26]) assert np.all(e.allowed_entries(3) == [5]) assert np.all(e.allowed_entries(15) == [4]) assert np.all(e.allowed_entries(16) == [26]) assert np.all(e.allowed_entries(17) == [4, 26]) assert np.all(e.allowed_entries(18) == [1, 26]) assert np.all(e.allowed_entries(19) == [26]) assert np.all(e.allowed_entries(20) == [26]) assert np.all(e.allowed_entries(21) == [4]) assert np.all(e.allowed_entries(22) == [1]) assert np.all(e.allowed_entries(23) == [26]) for i in [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]: assert len(e.allowed_entries(i)) == 0 u = Vertex(bbs, '_N') v = Vertex(bbs, 'CN') e = Edge(u, bbs, v, bbs, splice_max_rms=0.7, splice_rms_range=5, splice_ncontact_cut=7, splice_clash_contact_range=9,splice_clash_contact_by_helix=False ) # yapf: disable assert np.all(e.allowed_entries(1) == [0, 18, 22]) assert np.all(e.allowed_entries(4) == [0, 1, 2, 15, 17, 21]) assert np.all(e.allowed_entries(5) == [2, 3]) assert np.all(e.allowed_entries(26) == [0, 1, 2, 16, 17, 18, 19, 20, 23]) assert np.all(e.allowed_entries(42) == [2, 3, 16, 17, 19, 23]) for i in [ 0, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60 ]: assert len(e.allowed_entries(i)) == 0 u = Vertex(bbs, '_N') v = Vertex(bbs, 'C_') e = Edge(u, bbs, v, bbs, splice_max_rms=0.7, splice_rms_range=5, splice_ncontact_cut=7, splice_clash_contact_range=9,splice_clash_contact_by_helix=False ) # yapf: disable assert np.all(e.allowed_entries(1) == [0, 18, 22]) assert np.all(e.allowed_entries(4) == [0, 1, 2, 15, 17, 21]) assert np.all(e.allowed_entries(5) == [2, 3]) assert np.all(e.allowed_entries(26) == [0, 1, 2, 16, 17, 18, 19, 20, 23]) assert np.all(e.allowed_entries(42) == [2, 3, 16, 17, 19, 23]) for i in [ 0, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60 ]: assert len(e.allowed_entries(i)) == 0 u = Vertex(bbs, 'NN') v = Vertex(bbs, 'C_') e = Edge(u, bbs, v, bbs, splice_max_rms=0.7, splice_rms_range=5, splice_ncontact_cut=7, splice_clash_contact_range=9,splice_clash_contact_by_helix=False ) # yapf: disable assert np.all(e.allowed_entries(5) == [0, 1, 2, 16, 17, 18, 19, 20, 23]) assert np.all(e.allowed_entries(21) == [0, 18, 22]) assert np.all(e.allowed_entries(24) == [0, 1, 2, 15, 17, 21]) assert np.all(e.allowed_entries(25) == [2, 3]) for i in [ 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 ]: assert len(e.allowed_entries(i)) == 0
def perf_grow_3(bbdb, maxbb=10, shuf=0, parallel=1): ttot = time() tdb = time() bbmap = dict( C3_N=bbdb.query('C3_N', max_bblocks=maxbb, shuffle=shuf), Het_CCX=bbdb.query('Het:CCX', max_bblocks=maxbb, shuffle=shuf), ) tdb = time() - tdb tvertex = time() bbs = ( bbmap['C3_N'], bbmap['Het_CCX'], bbmap['C3_N'], ) V = ( Vertex(bbs[0], '_N', min_seg_len=15, parallel=parallel), Vertex(bbs[1], 'CC', min_seg_len=15, parallel=parallel), Vertex(bbs[2], 'N_', min_seg_len=15, parallel=parallel), ) tvertex = time() - tvertex tedge = time() E = [ Edge(V[i], bbs[i], V[i + 1], bbs[i + 1], parallel=parallel, verbosity=1) for i in range(len(V) - 1) ] tedge = time() - tedge # print(f'edge creation time {tedgne:7.3f} {e.len} {f.len}') tgrow = time() w = grow_linear(V, E, loss_function=lossfunc_rand_1_in(1), parallel=parallel) tgrow = time() - tgrow Nres = len(w.losses) Ntot = np.prod([v.len for v in V]) ttot = time() - ttot factor = np.log10(Ntot / (Nres + 1)) - 3 # every 1000th print(f' perf_grow_3 {maxbb:4} {ttot:7.1f}s {Nres:12,} {Ntot:20,} tv' f' {tvertex:7.1f}s te {tedge:7.1f}s tg {tgrow:7.1f}s {factor:10.3f}') sys.stdout.flush() graph = Graph(bbs, V, E) tclash = time() norig = len(w.indices) w = prune_clashing_results(graph, w, parallel=parallel) print('pruned clashes, %i of %i remain,' % (len(w.indices), norig), 'took', time() - tclash, 'seconds') if len(w.indices) > 0: tpdb = time() exe = cf.ThreadPoolExecutor if parallel else InProcessExecutor with exe(max_workers=3) as pool: futures = list() for i in range(len(w.indices)): futures.append( pool.submit(_dump_pdb, bbdb, graph, i, w.indices[i], w.positions[i])) [f.result() for f in futures] print('dumped %i structures' % len(w.indices), 'time', time() - tpdb)