def count_tanimoto_hits_arena(query_arena, target_arena, threshold=0.7): """For each fingerprint in `query_arena`, count the number of hits in `target_arena` at least `threshold` similar to it Example:: queries = chemfp.load_fingerprints("queries.fps") targets = chemfp.load_fingerprints("targets.fps") counts = chemfp.search.count_tanimoto_hits_arena(queries, targets, threshold=0.1) print counts[:10] The result is implementation specific. You'll always be able to get its length and do an index lookup to get an integer count. Currently it's a ctype array of longs, but it could be an array.array or Python list in the future. :param query_arena: The query fingerprints. :type query_arena: a FingerprintArena :param target_arena: The target fingerprints. :param threshold: The minimum score threshold. :type threshold: float between 0.0 and 1.0, inclusive :returns: an array of counts """ _require_matching_sizes(query_arena, target_arena) counts = (ctypes.c_int * len(query_arena))() _chemfp.count_tanimoto_arena( threshold, target_arena.num_bits, query_arena.start_padding, query_arena.end_padding, query_arena.storage_size, query_arena.arena, query_arena.start, query_arena.end, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, counts) return counts
def count_tanimoto_hits_fp(query_fp, target_arena, threshold=0.7): """Count the number of hits in `target_arena` at least `threshold` similar to the `query_fp` Example:: query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0] targets = chemfp.load_fingerprints("targets.fps") print chemfp.search.count_tanimoto_hits_fp(query_fp, targets, threshold=0.1) :param query_fp: the query fingerprint :type query_fp: a byte string :param target_arena: the target arena :type target_fp: a FingerprintArena :param threshold: The minimum score threshold. :type threshold: float between 0.0 and 1.0, inclusive :returns: an integer count """ _require_matching_fp_size(query_fp, target_arena) # Improve the alignment so the faster algorithms can be used query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint( query_fp, target_arena.alignment, target_arena.storage_size) counts = array.array("i", (0 for i in xrange(len(query_fp)))) _chemfp.count_tanimoto_arena( threshold, target_arena.num_bits, query_start_padding, query_end_padding, target_arena.storage_size, query_fp, 0, 1, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, counts) return counts[0]
def count_tanimoto_hits_arena(query_arena, target_arena, threshold=0.7): """For each fingerprint in `query_arena`, count the number of hits in `target_arena` at least `threshold` similar to it Example:: queries = chemfp.load_fingerprints("queries.fps") targets = chemfp.load_fingerprints("targets.fps") counts = chemfp.search.count_tanimoto_hits_arena(queries, targets, threshold=0.1) print counts[:10] The result is implementation specific. You'll always be able to get its length and do an index lookup to get an integer count. Currently it's a ctype array of longs, but it could be an array.array or Python list in the future. :param query_arena: The query fingerprints. :type query_arena: a FingerprintArena :param target_arena: The target fingerprints. :param threshold: The minimum score threshold. :type threshold: float between 0.0 and 1.0, inclusive :returns: an array of counts """ _require_matching_sizes(query_arena, target_arena) counts = (ctypes.c_int*len(query_arena))() _chemfp.count_tanimoto_arena(threshold, target_arena.num_bits, query_arena.start_padding, query_arena.end_padding, query_arena.storage_size, query_arena.arena, query_arena.start, query_arena.end, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, counts) return counts
def count_tanimoto_hits_fp(query_fp, target_arena, threshold=0.7): """Count the number of hits in `target_arena` at least `threshold` similar to the `query_fp` Example:: query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0] targets = chemfp.load_fingerprints("targets.fps") print chemfp.search.count_tanimoto_hits_fp(query_fp, targets, threshold=0.1) :param query_fp: the query fingerprint :type query_fp: a byte string :param target_arena: the target arena :type target_fp: a FingerprintArena :param threshold: The minimum score threshold. :type threshold: float between 0.0 and 1.0, inclusive :returns: an integer count """ _require_matching_fp_size(query_fp, target_arena) # Improve the alignment so the faster algorithms can be used query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint( query_fp, target_arena.alignment, target_arena.storage_size) counts = array.array("i", (0 for i in xrange(len(query_fp)))) _chemfp.count_tanimoto_arena(threshold, target_arena.num_bits, query_start_padding, query_end_padding, target_arena.storage_size, query_fp, 0, 1, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, counts) return counts[0]
def count_tanimoto_hits(query_arena, target_arena, threshold): require_matching_sizes(query_arena, target_arena) counts = (ctypes.c_int * len(query_arena))() _chemfp.count_tanimoto_arena( threshold, target_arena.num_bits, query_arena.start_padding, query_arena.end_padding, query_arena.storage_size, query_arena.arena, query_arena.start, query_arena.end, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, counts) return counts
def count_tanimoto_hits_fp(query_fp, target_arena, threshold): require_matching_fp_size(query_fp, target_arena) # Improve the alignment so the faster algorithms can be used query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint( query_fp, target_arena.alignment, target_arena.storage_size) counts = array.array("i", (0 for i in xrange(len(query_fp)))) _chemfp.count_tanimoto_arena( threshold, target_arena.num_bits, query_start_padding, query_end_padding, target_arena.storage_size, query_fp, 0, 1, target_arena.start_padding, target_arena.end_padding, target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end, target_arena.popcount_indices, counts) return counts[0]