def knearest_tanimoto_search(query_arena, target_arena, k, threshold): require_matching_sizes(query_arena, target_arena) num_queries = len(query_arena) results = SearchResults(num_queries, target_arena.ids) _chemfp.knearest_tanimoto_arena( k, threshold, target_arena.num_bits, query_arena.start_padding, query_arena.end_padding, query_arena.storage_size, query_arena.arena, query_arena.start, query_arena.end, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, results, 0) _chemfp.knearest_results_finalize(results, 0, num_queries) return results
def knearest_tanimoto_search_fp(query_fp, target_arena, k, threshold): require_matching_fp_size(query_fp, target_arena) query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint( query_fp, target_arena.alignment, target_arena.storage_size) if k < 0: raise ValueError("k must be non-negative") results = SearchResults(1) _chemfp.knearest_tanimoto_arena( k, threshold, target_arena.num_bits, query_start_padding, query_end_padding, target_arena.storage_size, query_fp, 0, 1, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, results, 0) _chemfp.knearest_results_finalize(results, 0, 1) return results[0]
def knearest_tanimoto_search_arena(query_arena, target_arena, k=3, threshold=0.7): """Search for the `k` nearest hits in the `target_arena` at least `threshold` similar to the fingerprints in `query_arena` The hits in the `SearchResults` are ordered by decreasing similarity score. Example:: queries = chemfp.load_fingerprints("queries.fps") targets = chemfp.load_fingerprints("targets.fps") results = chemfp.search.knearest_tanimoto_search_arena(queries, targets, k=3, threshold=0.5) for query_id, query_hits in zip(queries.ids, results): if len(query_hits) >= 2: print query_id, "->", ", ".join(query_hits.get_ids()) :param query_arena: The query fingerprints. :type query_arena: a FingerprintArena :param target_arena: The target fingerprints. :type target_arena: a FingerprintArena :param k: the number of nearest neighbors to find. :type k: positive integer :param threshold: The minimum score threshold. :type threshold: float between 0.0 and 1.0, inclusive :returns: a SearchResults instance """ _require_matching_sizes(query_arena, target_arena) num_queries = len(query_arena) results = SearchResults(num_queries, target_arena.arena_ids) _chemfp.knearest_tanimoto_arena( k, threshold, target_arena.num_bits, query_arena.start_padding, query_arena.end_padding, query_arena.storage_size, query_arena.arena, query_arena.start, query_arena.end, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, results, 0) _chemfp.knearest_results_finalize(results, 0, num_queries) return results
def knearest_tanimoto_search_fp(query_fp, target_arena, k=3, threshold=0.7): """Search for `k`-nearest hits in `target_arena` which are at least `threshold` similar to `query_fp` The hits in the `SearchResults` are ordered by decreasing similarity score. Example:: query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0] targets = chemfp.load_fingerprints("targets.fps") print list(chemfp.search.knearest_tanimoto_search_fp(query_fp, targets, k=3, threshold=0.0)) :param query_fp: the query fingerprint :type query_fp: a byte string :param target_arena: the target arena :type target_fp: a FingerprintArena :param k: the number of nearest neighbors to find. :type k: positive integer :param threshold: The minimum score threshold. :type threshold: float between 0.0 and 1.0, inclusive :returns: a SearchResult """ _require_matching_fp_size(query_fp, target_arena) query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint( query_fp, target_arena.alignment, target_arena.storage_size) if k < 0: raise ValueError("k must be non-negative") results = SearchResults(1, target_arena.arena_ids) _chemfp.knearest_tanimoto_arena( k, threshold, target_arena.num_bits, query_start_padding, query_end_padding, target_arena.storage_size, query_fp, 0, 1, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, results, 0) _chemfp.knearest_results_finalize(results, 0, 1) return results[0]