Beispiel #1
0
def knearest_tanimoto_search(query_arena, target_arena, k, threshold):
    require_matching_sizes(query_arena, target_arena)

    num_queries = len(query_arena)

    results = SearchResults(num_queries, target_arena.ids)

    _chemfp.knearest_tanimoto_arena(
        k, threshold, target_arena.num_bits, query_arena.start_padding,
        query_arena.end_padding, query_arena.storage_size, query_arena.arena,
        query_arena.start, query_arena.end, target_arena.start_padding,
        target_arena.end_padding, target_arena.storage_size,
        target_arena.arena, target_arena.start, target_arena.end,
        target_arena.popcount_indices, results, 0)

    _chemfp.knearest_results_finalize(results, 0, num_queries)

    return results
Beispiel #2
0
def knearest_tanimoto_search_fp(query_fp, target_arena, k, threshold):
    require_matching_fp_size(query_fp, target_arena)
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)

    if k < 0:
        raise ValueError("k must be non-negative")

    results = SearchResults(1)
    _chemfp.knearest_tanimoto_arena(
        k, threshold, target_arena.num_bits, query_start_padding,
        query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start,
        target_arena.end, target_arena.popcount_indices, results, 0)
    _chemfp.knearest_results_finalize(results, 0, 1)

    return results[0]
Beispiel #3
0
def knearest_tanimoto_search_arena(query_arena,
                                   target_arena,
                                   k=3,
                                   threshold=0.7):
    """Search for the `k` nearest hits in the `target_arena` at least `threshold` similar to the fingerprints in `query_arena`

    The hits in the `SearchResults` are ordered by decreasing similarity score.

    Example::
    
        queries = chemfp.load_fingerprints("queries.fps")
        targets = chemfp.load_fingerprints("targets.fps")
        results = chemfp.search.knearest_tanimoto_search_arena(queries, targets, k=3, threshold=0.5)
        for query_id, query_hits in zip(queries.ids, results):
            if len(query_hits) >= 2:
                print query_id, "->", ", ".join(query_hits.get_ids())

    :param query_arena: The query fingerprints.
    :type query_arena: a FingerprintArena
    :param target_arena: The target fingerprints.
    :type target_arena: a FingerprintArena
    :param k: the number of nearest neighbors to find.
    :type k: positive integer
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: a SearchResults instance
    """
    _require_matching_sizes(query_arena, target_arena)

    num_queries = len(query_arena)

    results = SearchResults(num_queries, target_arena.arena_ids)

    _chemfp.knearest_tanimoto_arena(
        k, threshold, target_arena.num_bits, query_arena.start_padding,
        query_arena.end_padding, query_arena.storage_size, query_arena.arena,
        query_arena.start, query_arena.end, target_arena.start_padding,
        target_arena.end_padding, target_arena.storage_size,
        target_arena.arena, target_arena.start, target_arena.end,
        target_arena.popcount_indices, results, 0)

    _chemfp.knearest_results_finalize(results, 0, num_queries)

    return results
Beispiel #4
0
def knearest_tanimoto_search_arena(query_arena, target_arena, k=3, threshold=0.7):
    """Search for the `k` nearest hits in the `target_arena` at least `threshold` similar to the fingerprints in `query_arena`

    The hits in the `SearchResults` are ordered by decreasing similarity score.

    Example::
    
        queries = chemfp.load_fingerprints("queries.fps")
        targets = chemfp.load_fingerprints("targets.fps")
        results = chemfp.search.knearest_tanimoto_search_arena(queries, targets, k=3, threshold=0.5)
        for query_id, query_hits in zip(queries.ids, results):
            if len(query_hits) >= 2:
                print query_id, "->", ", ".join(query_hits.get_ids())

    :param query_arena: The query fingerprints.
    :type query_arena: a FingerprintArena
    :param target_arena: The target fingerprints.
    :type target_arena: a FingerprintArena
    :param k: the number of nearest neighbors to find.
    :type k: positive integer
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: a SearchResults instance
    """
    _require_matching_sizes(query_arena, target_arena)

    num_queries = len(query_arena)

    results = SearchResults(num_queries, target_arena.arena_ids)

    _chemfp.knearest_tanimoto_arena(
        k, threshold, target_arena.num_bits,
        query_arena.start_padding, query_arena.end_padding,
        query_arena.storage_size, query_arena.arena, query_arena.start, query_arena.end,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end,
        target_arena.popcount_indices,
        results, 0)
    
    _chemfp.knearest_results_finalize(results, 0, num_queries)
    
    return results
Beispiel #5
0
def knearest_tanimoto_search_fp(query_fp, target_arena, k=3, threshold=0.7):
    """Search for `k`-nearest hits in `target_arena` which are at least `threshold` similar to `query_fp`

    The hits in the `SearchResults` are ordered by decreasing similarity score.

    Example::

        query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0]
        targets = chemfp.load_fingerprints("targets.fps")
        print list(chemfp.search.knearest_tanimoto_search_fp(query_fp, targets, k=3, threshold=0.0))

    :param query_fp: the query fingerprint
    :type query_fp: a byte string
    :param target_arena: the target arena
    :type target_fp: a FingerprintArena
    :param k: the number of nearest neighbors to find.
    :type k: positive integer
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: a SearchResult
    """
    _require_matching_fp_size(query_fp, target_arena)
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)
    
    if k < 0:
        raise ValueError("k must be non-negative")

    results = SearchResults(1, target_arena.arena_ids)
    _chemfp.knearest_tanimoto_arena(
        k, threshold, target_arena.num_bits,
        query_start_padding, query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end,
        target_arena.popcount_indices,
        results, 0)
    _chemfp.knearest_results_finalize(results, 0, 1)

    return results[0]
Beispiel #6
0
def knearest_tanimoto_search_fp(query_fp, target_arena, k=3, threshold=0.7):
    """Search for `k`-nearest hits in `target_arena` which are at least `threshold` similar to `query_fp`

    The hits in the `SearchResults` are ordered by decreasing similarity score.

    Example::

        query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0]
        targets = chemfp.load_fingerprints("targets.fps")
        print list(chemfp.search.knearest_tanimoto_search_fp(query_fp, targets, k=3, threshold=0.0))

    :param query_fp: the query fingerprint
    :type query_fp: a byte string
    :param target_arena: the target arena
    :type target_fp: a FingerprintArena
    :param k: the number of nearest neighbors to find.
    :type k: positive integer
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: a SearchResult
    """
    _require_matching_fp_size(query_fp, target_arena)
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)

    if k < 0:
        raise ValueError("k must be non-negative")

    results = SearchResults(1, target_arena.arena_ids)
    _chemfp.knearest_tanimoto_arena(
        k, threshold, target_arena.num_bits, query_start_padding,
        query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start,
        target_arena.end, target_arena.popcount_indices, results, 0)
    _chemfp.knearest_results_finalize(results, 0, 1)

    return results[0]