def threshold_tanimoto_search_fp(query_fp, target_reader, threshold): """Find matches in the target reader which are at least threshold similar to the query fingerprint The results is an FPSSearchResults instance contain the result. """ ids = [] scores = [] fp_size = len(query_fp) num_bits = fp_size * 8 NUM_CELLS = 1000 cells = (TanimotoCell*NUM_CELLS)() lineno = target_reader._first_fp_lineno for block in target_reader.iter_blocks(): start = 0 end = len(block) while 1: err, start, num_lines, num_cells = _chemfp.fps_threshold_tanimoto_search( num_bits, 0, 0, fp_size, query_fp, 0, -1, block, start, end, threshold, cells) lineno += num_lines if err: raise _chemfp_error(err, lineno, target_reader._filename) for cell in itertools.islice(cells, 0, num_cells): ids.append(block[cell.id_start:cell.id_end]) scores.append(cell.score) if start == end: break return FPSSearchResult(ids, scores)
def threshold_tanimoto_search_fp(query_fp, target_reader, threshold): """Find matches in the target reader which are at least threshold similar to the query fingerprint The results is an FPSSearchResults instance contain the result. """ ids = [] scores = [] fp_size = len(query_fp) num_bits = fp_size * 8 NUM_CELLS = 1000 cells = (TanimotoCell * NUM_CELLS)() lineno = target_reader._first_fp_lineno for block in target_reader.iter_blocks(): start = 0 end = len(block) while 1: err, start, num_lines, num_cells = _chemfp.fps_threshold_tanimoto_search( num_bits, 0, 0, fp_size, query_fp, 0, -1, block, start, end, threshold, cells) lineno += num_lines if err: raise _chemfp_error(err, lineno, target_reader._filename) for cell in itertools.islice(cells, 0, num_cells): ids.append(block[cell.id_start:cell.id_end]) scores.append(cell.score) if start == end: break return FPSSearchResult(ids, scores)
def threshold_tanimoto_search_arena(query_arena, target_reader, threshold): """Find matches in the target reader which are at least threshold similar to the query arena fingerprints The results are a list in the form [search_results1, search_results2, ...] where search_results are in the same order as the fingerprints in the query_arena. """ require_matching_sizes(query_arena, target_reader) if not query_arena: return FPSSearchResults([]) results = [FPSSearchResult([], []) for i in xrange(len(query_arena))] # Compute at least 100 tanimotos per query, but at most 10,000 at a time # (That's about 200K of memory) NUM_CELLS = max(10000, len(query_arena) * 100) cells = (TanimotoCell*NUM_CELLS)() lineno = target_reader._first_fp_lineno for block in target_reader.iter_blocks(): start = 0 end = len(block) while 1: err, start, num_lines, num_cells = _chemfp.fps_threshold_tanimoto_search( query_arena.metadata.num_bits, query_arena.start_padding, query_arena.end_padding, query_arena.storage_size, query_arena.arena, 0, -1, block, start, end, threshold, cells) lineno += num_lines if err: raise _chemfp_error(err, lineno, target_reader._filename) for cell in itertools.islice(cells, 0, num_cells): id = block[cell.id_start:cell.id_end] result = results[cell.query_index] result.ids.append(id) result.scores.append(cell.score) if start == end: break return FPSSearchResults(results)
def threshold_tanimoto_search_arena(query_arena, target_reader, threshold): """Find matches in the target reader which are at least threshold similar to the query arena fingerprints The results are a list in the form [search_results1, search_results2, ...] where search_results are in the same order as the fingerprints in the query_arena. """ require_matching_sizes(query_arena, target_reader) if not query_arena: return FPSSearchResults([]) results = [FPSSearchResult([], []) for i in xrange(len(query_arena))] # Compute at least 100 tanimotos per query, but at most 10,000 at a time # (That's about 200K of memory) NUM_CELLS = max(10000, len(query_arena) * 100) cells = (TanimotoCell * NUM_CELLS)() lineno = target_reader._first_fp_lineno for block in target_reader.iter_blocks(): start = 0 end = len(block) while 1: err, start, num_lines, num_cells = _chemfp.fps_threshold_tanimoto_search( query_arena.metadata.num_bits, query_arena.start_padding, query_arena.end_padding, query_arena.storage_size, query_arena.arena, 0, -1, block, start, end, threshold, cells) lineno += num_lines if err: raise _chemfp_error(err, lineno, target_reader._filename) for cell in itertools.islice(cells, 0, num_cells): id = block[cell.id_start:cell.id_end] result = results[cell.query_index] result.ids.append(id) result.scores.append(cell.score) if start == end: break return FPSSearchResults(results)