Example #1
0
def count_tanimoto_hits_arena(query_arena, target_arena, threshold=0.7):
    """For each fingerprint in `query_arena`, count the number of hits in `target_arena` at least `threshold` similar to it
    
    Example::
    
        queries = chemfp.load_fingerprints("queries.fps")
        targets = chemfp.load_fingerprints("targets.fps")
        counts = chemfp.search.count_tanimoto_hits_arena(queries, targets, threshold=0.1)
        print counts[:10]

    The result is implementation specific. You'll always be able to
    get its length and do an index lookup to get an integer
    count. Currently it's a ctype array of longs, but it could be an
    array.array or Python list in the future.

    :param query_arena: The query fingerprints.
    :type query_arena: a FingerprintArena
    :param target_arena: The target fingerprints.
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: an array of counts
    """
    _require_matching_sizes(query_arena, target_arena)

    counts = (ctypes.c_int * len(query_arena))()
    _chemfp.count_tanimoto_arena(
        threshold, target_arena.num_bits, query_arena.start_padding,
        query_arena.end_padding, query_arena.storage_size, query_arena.arena,
        query_arena.start, query_arena.end, target_arena.start_padding,
        target_arena.end_padding, target_arena.storage_size,
        target_arena.arena, target_arena.start, target_arena.end,
        target_arena.popcount_indices, counts)
    return counts
Example #2
0
def count_tanimoto_hits_fp(query_fp, target_arena, threshold=0.7):
    """Count the number of hits in `target_arena` at least `threshold` similar to the `query_fp`

    Example::
    
        query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0]
        targets = chemfp.load_fingerprints("targets.fps")
        print chemfp.search.count_tanimoto_hits_fp(query_fp, targets, threshold=0.1)
        

    :param query_fp: the query fingerprint
    :type query_fp: a byte string
    :param target_arena: the target arena
    :type target_fp: a FingerprintArena
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: an integer count
    """
    _require_matching_fp_size(query_fp, target_arena)
    # Improve the alignment so the faster algorithms can be used
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)

    counts = array.array("i", (0 for i in xrange(len(query_fp))))
    _chemfp.count_tanimoto_arena(
        threshold, target_arena.num_bits, query_start_padding,
        query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start,
        target_arena.end, target_arena.popcount_indices, counts)
    return counts[0]
Example #3
0
def count_tanimoto_hits_arena(query_arena, target_arena, threshold=0.7):
    """For each fingerprint in `query_arena`, count the number of hits in `target_arena` at least `threshold` similar to it
    
    Example::
    
        queries = chemfp.load_fingerprints("queries.fps")
        targets = chemfp.load_fingerprints("targets.fps")
        counts = chemfp.search.count_tanimoto_hits_arena(queries, targets, threshold=0.1)
        print counts[:10]

    The result is implementation specific. You'll always be able to
    get its length and do an index lookup to get an integer
    count. Currently it's a ctype array of longs, but it could be an
    array.array or Python list in the future.

    :param query_arena: The query fingerprints.
    :type query_arena: a FingerprintArena
    :param target_arena: The target fingerprints.
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: an array of counts
    """
    _require_matching_sizes(query_arena, target_arena)

    counts = (ctypes.c_int*len(query_arena))()
    _chemfp.count_tanimoto_arena(threshold, target_arena.num_bits,
                                 query_arena.start_padding, query_arena.end_padding,
                                 query_arena.storage_size,
                                 query_arena.arena, query_arena.start, query_arena.end,
                                 target_arena.start_padding, target_arena.end_padding,
                                 target_arena.storage_size,
                                 target_arena.arena, target_arena.start, target_arena.end,
                                 target_arena.popcount_indices,
                                 counts)
    return counts    
Example #4
0
def count_tanimoto_hits_fp(query_fp, target_arena, threshold=0.7):
    """Count the number of hits in `target_arena` at least `threshold` similar to the `query_fp`

    Example::
    
        query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0]
        targets = chemfp.load_fingerprints("targets.fps")
        print chemfp.search.count_tanimoto_hits_fp(query_fp, targets, threshold=0.1)
        

    :param query_fp: the query fingerprint
    :type query_fp: a byte string
    :param target_arena: the target arena
    :type target_fp: a FingerprintArena
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: an integer count
    """
    _require_matching_fp_size(query_fp, target_arena)
    # Improve the alignment so the faster algorithms can be used
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)
                                                 
    counts = array.array("i", (0 for i in xrange(len(query_fp))))
    _chemfp.count_tanimoto_arena(threshold, target_arena.num_bits,
                                 query_start_padding, query_end_padding,
                                 target_arena.storage_size, query_fp, 0, 1,
                                 target_arena.start_padding, target_arena.end_padding,
                                 target_arena.storage_size, target_arena.arena,
                                 target_arena.start, target_arena.end,
                                 target_arena.popcount_indices,
                                 counts)
    return counts[0]
Example #5
0
def count_tanimoto_hits(query_arena, target_arena, threshold):
    require_matching_sizes(query_arena, target_arena)

    counts = (ctypes.c_int * len(query_arena))()
    _chemfp.count_tanimoto_arena(
        threshold, target_arena.num_bits, query_arena.start_padding,
        query_arena.end_padding, query_arena.storage_size, query_arena.arena,
        query_arena.start, query_arena.end, target_arena.start_padding,
        target_arena.end_padding, target_arena.storage_size,
        target_arena.arena, target_arena.start, target_arena.end,
        target_arena.popcount_indices, counts)
    return counts
Example #6
0
def count_tanimoto_hits_fp(query_fp, target_arena, threshold):
    require_matching_fp_size(query_fp, target_arena)
    # Improve the alignment so the faster algorithms can be used
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)

    counts = array.array("i", (0 for i in xrange(len(query_fp))))
    _chemfp.count_tanimoto_arena(
        threshold, target_arena.num_bits, query_start_padding,
        query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start,
        target_arena.end, target_arena.popcount_indices, counts)
    return counts[0]