def test_linear_search_three(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') u = Vertex(bbs, '_C') v = Vertex(bbs, 'NC') w = Vertex(bbs, 'N_') verts = (u, v, w) kw = dict( splice_max_rms=0.5, splice_ncontact_cut=30, splice_clash_d2=4.0**2, # ca only splice_contact_d2=8.0**2, splice_rms_range=6, splice_clash_contact_range=60, splice_clash_contact_by_helix=False) e = Edge(u, bbs, v, bbs, **kw) f = Edge(v, bbs, w, bbs, **kw) edges = (e, f) # print('------------- e ---------------') # _print_splices(e) # print('------------- f ---------------') # _print_splices(f) # print('------------- result ---------------') ssdag = SearchSpaceDag(None, (bbs, ) * 3, verts, edges) result = grow_linear(ssdag, no_duplicate_bases=False) # from time import clock # t = clock() # for i in range(100): # grow_linear(verts, edges) # print('time 10', clock() - t) # assert 0 assert np.allclose(result.pos[:, 0], np.eye(4)) idx = _expand_inout_indices(verts, result.idx) isort = np.lexsort((idx[:, 3], idx[:, 2], idx[:, 1], idx[:, 0])) idx = idx[isort, :] assert len(idx) == _num_splices(e) * _num_splices(f) np.set_printoptions(threshold=np.nan) print(repr(idx)) assert np.all(idx == [ [0, 19, 0, 3], [0, 19, 0, 60], [0, 19, 16, 39], [0, 19, 17, 0], [0, 19, 17, 58], [0, 19, 18, 59], [0, 19, 22, 20], [0, 19, 22, 59], [0, 19, 23, 39], [0, 19, 23, 40], [0, 19, 23, 60], [17, 17, 0, 3], [17, 17, 0, 60], [17, 17, 16, 39], [17, 17, 17, 0], [17, 17, 17, 58], [17, 17, 18, 59], [17, 17, 22, 20], [17, 17, 22, 59], [17, 17, 23, 39], [17, 17, 23, 40], [17, 17, 23, 60], [18, 18, 0, 3], [18, 18, 0, 60], [18, 18, 16, 39], [18, 18, 17, 0], [18, 18, 17, 58], [18, 18, 18, 59], [18, 18, 22, 20], [18, 18, 22, 59], [18, 18, 23, 39], [18, 18, 23, 40], [18, 18, 23, 60], [22, 18, 0, 3], [22, 18, 0, 60], [22, 18, 16, 39], [22, 18, 17, 0], [22, 18, 17, 58], [22, 18, 18, 59], [22, 18, 22, 20], [22, 18, 22, 59], [22, 18, 23, 39], [22, 18, 23, 40], [22, 18, 23, 60], [23, 19, 0, 3], [23, 19, 0, 60], [23, 19, 16, 39], [23, 19, 17, 0], [23, 19, 17, 58], [23, 19, 18, 59], [23, 19, 22, 20], [23, 19, 22, 59], [23, 19, 23, 39], [23, 19, 23, 40], [23, 19, 23, 60] ])
def test_linear_search_two(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') u = Vertex(bbs, '_C') v = Vertex(bbs, 'N_') verts = (u, v) kw = dict( splice_max_rms=0.7, splice_ncontact_cut=30, splice_clash_d2=4.0**2, # ca only splice_contact_d2=8.0**2, splice_rms_range=6, splice_clash_contact_range=60, splice_clash_contact_by_helix=False) edges = (Edge(u, bbs, v, bbs, **kw), ) assert np.all(u.inout[:, 1] == np.arange(u.len)) assert np.all(v.inout[:, 0] == np.arange(v.len)) ssdag = SearchSpaceDag(None, (bbs, ) * 2, verts, edges) result = grow_linear(ssdag, no_duplicate_bases=False) assert np.allclose(result.pos[:, 0], np.eye(4)) isort = np.lexsort((result.idx[:, 1], result.idx[:, 0])) sortidx = result.idx[isort, :] print(repr(sortidx)) assert np.all( sortidx == [[0, 3], [0, 24], [0, 41], [0, 60], [1, 22], [1, 25], [16, 3], [16, 39], [16, 40], [16, 57], [16, 60], [17, 0], [17, 22], [17, 40], [17, 55], [17, 58], [18, 23], [18, 38], [18, 55], [18, 59], [19, 24], [19, 41], [19, 56], [19, 60], [20, 18], [20, 57], [21, 58], [22, 20], [22, 23], [22, 38], [22, 39], [22, 59], [22, 60], [23, 24], [23, 39], [23, 40], [23, 41], [23, 54], [23, 60]])
def test_linear_search_two(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') u = Vertex(bbs, '_C') v = Vertex(bbs, 'N_') e = Edge(u, bbs, v, bbs) assert np.all(u.inout[:, 1] == np.arange(u.len)) assert np.all(v.inout[:, 0] == np.arange(v.len)) result = grow_linear((u, v), (e, )) assert np.allclose(result.positions[:, 0], np.eye(4)) assert np.all( result.indices == [[0, 22], [18, 40], [19, 21], [19, 60], [21, 0], [21, 58], [22, 1], [22, 57], [22, 59], [22, 60], [23, 20], [23, 58], [23, 59], [23, 60]]) # yapf: disable
def test_linear_search_three(bbdb_fullsize_prots): bbs = bbdb_fullsize_prots.query('all') u = Vertex(bbs, '_C') v = Vertex(bbs, 'NC') w = Vertex(bbs, 'N_') verts = (u, v, w) e = Edge(u, bbs, v, bbs) f = Edge(v, bbs, w, bbs) edges = (e, f) # print('------------- e ---------------') # _print_splices(e) # print('------------- f ---------------') # _print_splices(f) # print('------------- result ---------------') result = grow_linear(verts, edges) # from time import clock # t = clock() # for i in range(100): # grow_linear(verts, edges) # print('time 10', clock() - t) # assert 0 idx = _expand_inout_indices(verts, result.indices) assert np.allclose(result.positions[:, 0], np.eye(4)) assert len(idx) == _num_splices(e) * _num_splices(f) assert np.all(idx == [ [19, 19, 0, 22], [19, 19, 18, 40], [19, 19, 19, 21], [19, 19, 19, 60], [19, 19, 21, 0], [19, 19, 21, 58], [19, 19, 22, 1], [19, 19, 22, 57], [19, 19, 22, 59], [19, 19, 22, 60], [19, 19, 23, 20], [19, 19, 23, 58], [19, 19, 23, 59], [19, 19, 23, 60], [21, 17, 0, 22], [21, 17, 18, 40], [21, 17, 19, 21], [21, 17, 19, 60], [21, 17, 21, 0], [21, 17, 21, 58], [21, 17, 22, 1], [21, 17, 22, 57], [21, 17, 22, 59], [21, 17, 22, 60], [21, 17, 23, 20], [21, 17, 23, 58], [21, 17, 23, 59], [21, 17, 23, 60], [22, 16, 0, 22], [22, 16, 18, 40], [22, 16, 19, 21], [22, 16, 19, 60], [22, 16, 21, 0], [22, 16, 21, 58], [22, 16, 22, 1], [22, 16, 22, 57], [22, 16, 22, 59], [22, 16, 22, 60], [22, 16, 23, 20], [22, 16, 23, 58], [22, 16, 23, 59], [22, 16, 23, 60], [22, 18, 0, 22], [22, 18, 18, 40], [22, 18, 19, 21], [22, 18, 19, 60], [22, 18, 21, 0], [22, 18, 21, 58], [22, 18, 22, 1], [22, 18, 22, 57], [22, 18, 22, 59], [22, 18, 22, 60], [22, 18, 23, 20], [22, 18, 23, 58], [22, 18, 23, 59], [22, 18, 23, 60], [22, 19, 0, 22], [22, 19, 18, 40], [22, 19, 19, 21], [22, 19, 19, 60], [22, 19, 21, 0], [22, 19, 21, 58], [22, 19, 22, 1], [22, 19, 22, 57], [22, 19, 22, 59], [22, 19, 22, 60], [22, 19, 23, 20], [22, 19, 23, 58], [22, 19, 23, 59], [22, 19, 23, 60], [23, 17, 0, 22], [23, 17, 18, 40], [23, 17, 19, 21], [23, 17, 19, 60], [23, 17, 21, 0], [23, 17, 21, 58], [23, 17, 22, 1], [23, 17, 22, 57], [23, 17, 22, 59], [23, 17, 22, 60], [23, 17, 23, 20], [23, 17, 23, 58], [23, 17, 23, 59], [23, 17, 23, 60], [23, 18, 0, 22], [23, 18, 18, 40], [23, 18, 19, 21], [23, 18, 19, 60], [23, 18, 21, 0], [23, 18, 21, 58], [23, 18, 22, 1], [23, 18, 22, 57], [23, 18, 22, 59], [23, 18, 22, 60], [23, 18, 23, 20], [23, 18, 23, 58], [23, 18, 23, 59], [23, 18, 23, 60], [23, 19, 0, 22], [23, 19, 18, 40], [23, 19, 19, 21], [23, 19, 19, 60], [23, 19, 21, 0], [23, 19, 21, 58], [23, 19, 22, 1], [23, 19, 22, 57], [23, 19, 22, 59], [23, 19, 22, 60], [23, 19, 23, 20], [23, 19, 23, 58], [23, 19, 23, 59], [23, 19, 23, 60] ]) # yapf: disable
def simple_search_dag(criteria, db=None, nbblocks=100, min_seg_len=15, parallel=False, verbosity=0, timing=0, modbbs=None, make_edges=True, merge_bblock=None, precache_splices=False, precache_only=False, bbs=None, only_seg=None, source=None, print_edge_summary=False, no_duplicate_bases=False, shuffle_bblocks=False, use_saved_bblocks=False, output_prefix='./worms', **kw): bbdb, spdb = db queries, directions = zip(*criteria.bbspec) tdb = time() if bbs is None: bbs = list() savename = output_prefix + '_bblocks.pickle' if use_saved_bblocks and os.path.exists(savename): with open(savename, 'rb') as inp: bbnames_list = _pickle.load(inp) for bbnames in bbnames_list: bbs.append([bbdb.bblock(n) for n in bbnames]) else: for iquery, query in enumerate(queries): msegs = [ i + len(queries) if i < 0 else i for i in criteria.which_mergeseg() ] if iquery in msegs[1:]: print('seg', iquery, 'repeating bblocks from', msegs[0]) bbs.append(bbs[msegs[0]]) continue bbs0 = bbdb.query( query, max_bblocks=nbblocks, shuffle_bblocks=shuffle_bblocks, parallel=parallel, ) bbs.append(bbs0) bases = [ Counter(bytes(b.base).decode('utf-8') for b in bbs0) for bbs0 in bbs ] assert len(bbs) == len(queries) for i, v in enumerate(bbs): assert len(v) > 0, 'no bblocks for query: "' + queries[i] + '"' print('bblock queries:', str(queries)) print('bblock numbers:', [len(b) for b in bbs]) print('bblocks id:', [id(b) for b in bbs]) print('bblock0 id ', [id(b[0]) for b in bbs]) print('base_counts:') for query, basecount in zip(queries, bases): counts = ' '.join(f'{k}: {c}' for k, c in basecount.items()) print(f' {query:10}', counts) if criteria.is_cyclic: for a, b in zip(bbs[criteria.from_seg], bbs[criteria.to_seg]): assert a is b bbs[criteria.to_seg] = bbs[criteria.from_seg] if use_saved_bblocks: bbnames = [[bytes(b.file).decode('utf-8') for b in bb] for bb in bbs] with open(savename, 'wb') as out: _pickle.dump(bbnames, out) else: bbs = bbs.copy() assert len(bbs) == len(criteria.bbspec) if modbbs: modbbs(bbs) if merge_bblock is not None and merge_bblock >= 0: # print('which_mergeseg', criteria.bbspec, criteria.which_mergeseg()) for i in criteria.which_mergeseg(): bbs[i] = (bbs[i][merge_bblock], ) tdb = time() - tdb # info( # f'bblock creation time {tdb:7.3f} num bbs: ' + # str([len(x) for x in bbs]) # ) if precache_splices: bbnames = [[bytes(bb.file) for bb in bbtup] for bbtup in bbs] bbpairs = set() # for bb1, bb2, dirn1 in zip(bbnames, bbnames[1:], directions): for i in range(len(bbnames) - 1): bb1 = bbnames[i] bb2 = bbnames[i + 1] dirn1 = directions[i] rev = dirn1[1] == 'N' if bbs[i] is bbs[i + 1]: bbpairs.update((a, a) for a in bb1) else: bbpairs.update( (b, a) if rev else (a, b) for a in bb1 for b in bb2) precompute_splicedb(db, bbpairs, verbosity=verbosity, parallel=parallel, **kw) if precache_only: return bbs verts = [None] * len(queries) edges = [None] * len(queries[1:]) if source: srcdirn = [''.join('NC_' [d] for d in source.verts[i].dirn) for i in range(len(source.verts))] # yapf: disable srcverts, srcedges = list(), list() for i, bb in enumerate(bbs): for isrc, bbsrc in enumerate(source.bbs): if directions[i] != srcdirn[isrc]: continue if [b.filehash for b in bb] == [b.filehash for b in bbsrc]: verts[i] = source.verts[isrc] srcverts.append(isrc) for i, bb in enumerate(zip(bbs, bbs[1:])): bb0, bb1 = bb for isrc, bbsrc in enumerate(zip(source.bbs, source.bbs[1:])): bbsrc0, bbsrc1 = bbsrc if directions[i] != srcdirn[isrc]: continue if directions[i + 1] != srcdirn[isrc + 1]: continue he = [b.filehash for b in bb0] == [b.filehash for b in bbsrc0] he &= [b.filehash for b in bb1] == [b.filehash for b in bbsrc1] if not he: continue edges[i] = source.edges[isrc] srcedges.append(isrc) if not make_edges: edges = [] tvertex = time() exe = InProcessExecutor() if parallel: exe = cf.ThreadPoolExecutor(max_workers=parallel) with exe as pool: if only_seg is not None: save = bbs, directions bbs = [bbs[only_seg]] directions = [directions[only_seg]] verts = [verts[only_seg]] futures = list() for i, bb in enumerate(bbs): dirn = directions[i] if verts[i] is None: futures.append( pool.submit(Vertex, bb, dirn, min_seg_len=min_seg_len)) verts_new = [f.result() for f in futures] isnone = [i for i in range(len(verts)) if verts[i] is None] for i, inone in enumerate(isnone): verts[inone] = verts_new[i] # print(i, len(verts_new), len(verts)) if isnone: assert i + 1 == len(verts_new) assert all(v for v in verts) if only_seg is not None: verts = ([None] * only_seg + verts + [None] * (len(queries) - only_seg - 1)) bbs, directions = save tvertex = time() - tvertex # info( # f'vertex creation time {tvertex:7.3f} num verts ' + # str([v.len if v else 0 for v in verts]) # ) if make_edges: tedge = time() for i, e in enumerate(edges): if e is None: edges[i] = Edge(verts[i], bbs[i], verts[i + 1], bbs[i + 1], splicedb=spdb, verbosity=verbosity, precache_splices=precache_splices, **kw) tedge = time() - tedge if print_edge_summary: _print_edge_summary(edges) # info( # f'edge creation time {tedge:7.3f} num splices ' + # str([e.total_allowed_splices() # for e in edges]) + ' num exits ' + str([e.len for e in edges]) # ) spdb.sync_to_disk() toret = SearchSpaceDag(criteria.bbspec, bbs, verts, edges) if timing: toret = toret, tdb, tvertex, tedge return toret
def simple_search_dag( criteria, db=None, nbblocks=[64], min_seg_len=15, parallel=False, verbosity=0, timing=0, modbbs=None, make_edges=True, merge_bblock=None, merge_segment=None, precache_splices=False, precache_only=False, bbs=None, bblock_ranges=[], only_seg=None, source=None, print_edge_summary=False, no_duplicate_bases=False, shuffle_bblocks=False, use_saved_bblocks=False, output_prefix="./worms", only_ivertex=[], **kw, ): bbdb, spdb = db queries, directions = zip(*criteria.bbspec) tdb = time() if bbs is None: bbs = list() savename = output_prefix + "_bblocks.pickle" if use_saved_bblocks and os.path.exists(savename): with open(savename, "rb") as inp: bbnames_list = _pickle.load(inp) # for i, l in enumerate(bbnames_list) # if len(l) >= nbblocks[i]: # assert 0, f"too many bblocks in {savename}" for i, bbnames in enumerate(bbnames_list): bbs.append([bbdb.bblock(n) for n in bbnames[:nbblocks[i]]]) else: for iquery, query in enumerate(queries): if hasattr(criteria, "cloned_segments"): msegs = [ i + len(queries) if i < 0 else i for i in criteria.cloned_segments() ] if iquery in msegs[1:]: print("seg", iquery, "repeating bblocks from", msegs[0]) bbs.append(bbs[msegs[0]]) continue bbs0 = bbdb.query( query, max_bblocks=nbblocks[iquery], shuffle_bblocks=shuffle_bblocks, parallel=parallel, ) bbs.append(bbs0) if bblock_ranges: bbs_sliced = list() assert len(bblock_ranges) == 2 * len(bbs) for ibb, bb in enumerate(bbs): lb, ub = bblock_ranges[2 * ibb:2 * ibb + 2] bbs_sliced.append(bb[lb:ub]) bbs = bbs_sliced for ibb, bb in enumerate(bbs): print("bblocks", ibb) for b in bb: print(" ", bytes(b.file).decode("utf-8")) bases = [ Counter(bytes(b.base).decode("utf-8") for b in bbs0) for bbs0 in bbs ] assert len(bbs) == len(queries) for i, v in enumerate(bbs): assert len(v) > 0, 'no bblocks for query: "' + queries[i] + '"' print("bblock queries:", str(queries)) print("bblock numbers:", [len(b) for b in bbs]) print("bblocks id:", [id(b) for b in bbs]) print("bblock0 id ", [id(b[0]) for b in bbs]) print("base_counts:") for query, basecount in zip(queries, bases): counts = " ".join(f"{k}: {c}" for k, c in basecount.items()) print(f" {query:10}", counts) if criteria.is_cyclic: # for a, b in zip(bbs[criteria.from_seg], bbs[criteria.to_seg]): # assert a is b bbs[criteria.to_seg] = bbs[criteria.from_seg] if use_saved_bblocks and not os.path.exists(savename): bbnames = [[bytes(b.file).decode("utf-8") for b in bb] for bb in bbs] with open(savename, "wb") as out: _pickle.dump(bbnames, out) else: bbs = bbs.copy() assert len(bbs) == len(criteria.bbspec) if modbbs: modbbs(bbs) if merge_bblock is not None and merge_bblock >= 0: # print('cloned_segments', criteria.bbspec, criteria.cloned_segments()) if hasattr(criteria, "cloned_segments") and merge_segment is None: for i in criteria.cloned_segments(): # print(' ', 'merge seg', i, 'merge_bblock', merge_bblock) bbs[i] = (bbs[i][merge_bblock], ) else: if merge_segment is None: merge_segment = 0 # print(' ', 'merge_segment not None') # print(' ', [len(b) for b in bbs]) # print(' ', 'merge_segment', merge_segment) # print(' ', 'merge_bblock', merge_bblock, len(bbs[merge_segment])) bbs[merge_segment] = (bbs[merge_segment][merge_bblock], ) tdb = time() - tdb # info( # f'bblock creation time {tdb:7.3f} num bbs: ' + # str([len(x) for x in bbs]) # ) if precache_splices: bbnames = [[bytes(bb.file) for bb in bbtup] for bbtup in bbs] bbpairs = set() # for bb1, bb2, dirn1 in zip(bbnames, bbnames[1:], directions): for i in range(len(bbnames) - 1): bb1 = bbnames[i] bb2 = bbnames[i + 1] dirn1 = directions[i] rev = dirn1[1] == "N" if bbs[i] is bbs[i + 1]: bbpairs.update((a, a) for a in bb1) else: bbpairs.update( (b, a) if rev else (a, b) for a in bb1 for b in bb2) precompute_splicedb(db, bbpairs, verbosity=verbosity, parallel=parallel, **kw) if precache_only: return bbs verts = [None] * len(queries) edges = [None] * len(queries[1:]) if source: srcdirn = [ "".join("NC_"[d] for d in source.verts[i].dirn) for i in range(len(source.verts)) ] # yapf: disable srcverts, srcedges = list(), list() for i, bb in enumerate(bbs): for isrc, bbsrc in enumerate(source.bbs): # fragile code... detecting this way can be wrong # print(i, isrc, directions[i], srcdirn[isrc]) if directions[i] != srcdirn[isrc]: continue if [b.filehash for b in bb] == [b.filehash for b in bbsrc]: # super hacky fix, really need to be passed info on what's what if srcverts and srcverts[-1] + 1 != isrc: continue verts[i] = source.verts[isrc] srcverts.append(isrc) for i, bb in enumerate(zip(bbs, bbs[1:])): bb0, bb1 = bb for isrc, bbsrc in enumerate(zip(source.bbs, source.bbs[1:])): bbsrc0, bbsrc1 = bbsrc if directions[i] != srcdirn[isrc]: continue if directions[i + 1] != srcdirn[isrc + 1]: continue he = [b.filehash for b in bb0] == [b.filehash for b in bbsrc0] he &= [b.filehash for b in bb1] == [b.filehash for b in bbsrc1] if not he: continue edges[i] = source.edges[isrc] srcedges.append(isrc) if not make_edges: edges = [] tvertex = time() exe = InProcessExecutor() if parallel: exe = cf.ThreadPoolExecutor(max_workers=parallel) with exe as pool: if only_seg is not None: save = bbs, directions bbs = [bbs[only_seg]] directions = [directions[only_seg]] verts = [verts[only_seg]] futures = list() for i, bb in enumerate(bbs): dirn = directions[i] if verts[i] is None: futures.append( pool.submit(Vertex, bb, dirn, min_seg_len=min_seg_len)) verts_new = [f.result() for f in futures] isnone = [i for i in range(len(verts)) if verts[i] is None] for i, inone in enumerate(isnone): verts[inone] = verts_new[i] if source: print('use new vertex', inone) if only_ivertex: # raise NotImplementedError print("!!!!!!! using one ivertex !!!!!", only_ivertex, len(verts), [v.len for v in verts]) if len(only_ivertex) != len(verts): print( "NOT altering verts, len(only_ivertex)!=len(verts) continuing...", "this is ok if part of a sub-protocol") else: for i, v in enumerate(verts): if v.len > 1: # could already have been "trimmed" assert only_ivertex[i] < v.len v.reduce_to_only_one_inplace(only_ivertex[i]) # print('x2exit', v.x2exit.shape) # print('x2orig', v.x2orig.shape) # print('ires', v.ires.shape) # print('isite', v.isite.shape) # print('ichain', v.ichain.shape) # print('ibblock', v.ibblock.shape) # print('inout', v.inout.shape, v.inout[10:]) # print('inbreaks', v.inbreaks.shape, v.inbreaks[10:]) # print('dirn', v.dirn.shape) # # assert 0 # print(i, len(verts_new), len(verts)) if isnone: assert i + 1 == len(verts_new) assert all(v for v in verts) if only_seg is not None: verts = [None] * only_seg + verts + [None] * (len(queries) - only_seg - 1) bbs, directions = save tvertex = time() - tvertex # info( # f'vertex creation time {tvertex:7.3f} num verts ' + # str([v.len if v else 0 for v in verts]) # ) if make_edges: tedge = time() for i, e in enumerate(edges): if e is not None: continue edges[i], edge_analysis = Edge( verts[i], bbs[i], verts[i + 1], bbs[i + 1], splicedb=spdb, verbosity=verbosity, precache_splices=precache_splices, **kw, ) allok = all(x[6] for x in edge_analysis) if allok: continue print("=" * 80) print("info for edges with no valid splices", edges[i].total_allowed_splices()) for tup in edge_analysis: iblk0, iblk1, ofst0, ofst1, ires0, ires1 = tup[:6] ok, f_clash, f_rms, f_ncontact, f_ncnh, f_nhc = tup[6:12] m_rms, m_ncontact, m_ncnh, m_nhc = tup[12:] if ok: continue assert len(bbs[i + 0]) > iblk0 assert len(bbs[i + 1]) > iblk1 print("=" * 80) print("egde Bblock A", bytes(bbs[i][iblk0].file)) print("egde Bblock B", bytes(bbs[i + 1][iblk1].file)) print( f"bb {iblk0:3} {iblk1:3}", f"ofst {ofst0:4} {ofst1:4}", f"resi {ires0.shape} {ires1.shape}", ) print( f"clash_ok {int(f_clash*100):3}%", f"rms_ok {int(f_rms*100):3}%", f"ncontact_ok {int(f_ncontact*100):3}%", f"ncnh_ok {int(f_ncnh*100):3}%", f"nhc_ok {int(f_nhc*100):3}%", ) print( f"min_rms {m_rms:7.3f}", f"max_ncontact {m_ncontact:7.3f}", f"max_ncnh {m_ncnh:7.3f}", f"max_nhc {m_nhc:7.3f}", ) print("=" * 80) fok = np.stack([x[7:12] for x in edge_analysis]).mean(axis=0) rmsmin = np.array([x[12] for x in edge_analysis]).min() fmx = np.stack([x[13:] for x in edge_analysis]).max(axis=0) print(f"{' SPLICE FAIL SUMMARY ':=^80}") print(f"splice clash ok {int(fok[0]*100):3}%") print(f"splice rms ok {int(fok[1]*100):3}%") print(f"splice ncontacts ok {int(fok[2]*100):3}%") print(f"splice ncontacts_no_helix ok {int(fok[3]*100):3}%") print(f"splice nhelixcontacted ok {int(fok[4]*100):3}%") print(f"min rms of any failing {rmsmin}") print( f"max ncontact of any failing {fmx[0]} (maybe large for non-5-helix splice)" ) print( f"max ncontact_no_helix {fmx[1]} (will be 999 for non-5-helix splice)" ) print( f"max nhelix_contacted {fmx[2]} (will be 999 for non-5-helix splice)" ) print("=" * 80) assert edges[i].total_allowed_splices() > 0, "invalid splice" tedge = time() - tedge if print_edge_summary: _print_edge_summary(edges) # info( # f'edge creation time {tedge:7.3f} num splices ' + # str([e.total_allowed_splices() # for e in edges]) + ' num exits ' + str([e.len for e in edges]) # ) spdb.sync_to_disk() toret = SearchSpaceDag(criteria.bbspec, bbs, verts, edges) if timing: toret = toret, tdb, tvertex, tedge return toret