def threshold_tanimoto_search_fp(query_fp, target_arena, threshold=0.7): """Search for fingerprint hits in `target_arena` which are at least `threshold` similar to `query_fp` The hits in the returned `SearchResult` are in arbitrary order. Example:: query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0] targets = chemfp.load_fingerprints("targets.fps") print list(chemfp.search.threshold_tanimoto_search_fp(query_fp, targets, threshold=0.15)) :param query_fp: the query fingerprint :type query_fp: a byte string :param target_arena: the target arena :type target_fp: a FingerprintArena :param threshold: The minimum score threshold. :type threshold: float between 0.0 and 1.0, inclusive :returns: a SearchResult """ _require_matching_fp_size(query_fp, target_arena) # Improve the alignment so the faster algorithms can be used query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint( query_fp, target_arena.alignment, target_arena.storage_size) results = SearchResults(1, target_arena.arena_ids) _chemfp.threshold_tanimoto_arena( threshold, target_arena.num_bits, query_start_padding, query_end_padding, target_arena.storage_size, query_fp, 0, 1, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, results, 0) return results[0]
def threshold_tanimoto_search_fp(query_fp, target_arena, threshold): require_matching_fp_size(query_fp, target_arena) # Improve the alignment so the faster algorithms can be used query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint( query_fp, target_arena.alignment, target_arena.storage_size) results = SearchResults(1) _chemfp.threshold_tanimoto_arena( threshold, target_arena.num_bits, query_start_padding, query_end_padding, target_arena.storage_size, query_fp, 0, 1, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, results, 0) return results[0]
def threshold_tanimoto_search(query_arena, target_arena, threshold): require_matching_sizes(query_arena, target_arena) num_queries = len(query_arena) results = SearchResults(num_queries, target_arena.ids) if num_queries: _chemfp.threshold_tanimoto_arena( threshold, target_arena.num_bits, query_arena.start_padding, query_arena.end_padding, query_arena.storage_size, query_arena.arena, query_arena.start, query_arena.end, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, results, 0) return results
def threshold_tanimoto_search_arena(query_arena, target_arena, threshold=0.7): """Search for the hits in the `target_arena` at least `threshold` similar to the fingerprints in `query_arena` The hits in the returned `SearchResults` are in arbitrary order. Example:: queries = chemfp.load_fingerprints("queries.fps") targets = chemfp.load_fingerprints("targets.fps") results = chemfp.search.threshold_tanimoto_search_arena(queries, targets, threshold=0.5) for query_id, query_hits in zip(queries.ids, results): if len(query_hits) > 0: print query_id, "->", ", ".join(query_hits.get_ids()) :param query_arena: The query fingerprints. :type query_arena: a FingerprintArena :param target_arena: The target fingerprints. :type target_arena: a FingerprintArena :param threshold: The minimum score threshold. :type threshold: float between 0.0 and 1.0, inclusive :returns: a SearchResults instance """ _require_matching_sizes(query_arena, target_arena) num_queries = len(query_arena) results = SearchResults(num_queries, target_arena.arena_ids) if num_queries: _chemfp.threshold_tanimoto_arena( threshold, target_arena.num_bits, query_arena.start_padding, query_arena.end_padding, query_arena.storage_size, query_arena.arena, query_arena.start, query_arena.end, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, results, 0) return results