Example #1
0
def fill_lower_triangle(results):
    """Duplicate each entry of `results` to its transpose

    This is used after the symmetric threshold search to turn the
    upper-triangle results into a full matrix.
    """
    _chemfp.fill_lower_triangle(results, len(results))
Example #2
0
def fill_lower_triangle(results):
    """Duplicate each entry of `results` to its transpose

    This is used after the symmetric threshold search to turn the
    upper-triangle results into a full matrix.
    """
    _chemfp.fill_lower_triangle(results, len(results))
Example #3
0
def threshold_tanimoto_search_symmetric(arena,
                                        threshold=0.7,
                                        include_lower_triangle=True,
                                        batch_size=100):
    """Search for the hits in the `arena` at least `threshold` similar to the fingerprints in the arena

    When `include_lower_triangle` is True, compute the upper-triangle
    similarities, then copy the results to get the full set of
    results. When `include_lower_triangle` is False, only compute the
    upper triangle.

    The computation can take a long time. Python won't check check for
    a ^C until the function finishes. This can be irritating. Instead,
    process only `batch_size` rows at a time before checking for a ^C.

    The hits in the returned `SearchResults` are in arbitrary order.

    Example::

        arena = chemfp.load_fingerprints("queries.fps")
        full_result = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=0.2)
        upper_triangle = chemfp.search.threshold_tanimoto_search_symmetric(
                  arena, threshold=0.2, include_lower_triangle=False)
        assert sum(map(len, full_result)) == sum(map(len, upper_triangle))*2
                  
    :param arena: the set of fingerprints
    :type arena: a FingerprintArena
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :param include_lower_triangle:
        if False, compute only the upper triangle, otherwise use symmetry to compute the full matrix
    :type include_lower_triangle: boolean
    :param batch_size: the number of rows to process before checking for a ^C
    :type batch_size: integer
    :returns: a SearchResults instance
    """

    if batch_size <= 0:
        raise ValueError("batch_size must be positive")
    N = len(arena)
    results = SearchResults(N, arena.arena_ids)

    if N:
        # Break it up into batch_size groups in order to let Python's
        # interrupt handler check for a ^C, which is otherwise
        # suppressed until the function finishes.
        for query_start in xrange(0, N, batch_size):
            query_end = min(query_start + batch_size, N)
            _chemfp.threshold_tanimoto_arena_symmetric(
                threshold, arena.num_bits, arena.start_padding,
                arena.end_padding, arena.storage_size, arena.arena,
                query_start, query_end, 0, N, arena.popcount_indices, results)

        if include_lower_triangle:
            _chemfp.fill_lower_triangle(results, N)

    return results
Example #4
0
def threshold_tanimoto_search_symmetric(arena, threshold=0.7, include_lower_triangle=True, batch_size=100):
    """Search for the hits in the `arena` at least `threshold` similar to the fingerprints in the arena

    When `include_lower_triangle` is True, compute the upper-triangle
    similarities, then copy the results to get the full set of
    results. When `include_lower_triangle` is False, only compute the
    upper triangle.

    The computation can take a long time. Python won't check check for
    a ^C until the function finishes. This can be irritating. Instead,
    process only `batch_size` rows at a time before checking for a ^C.

    The hits in the returned `SearchResults` are in arbitrary order.

    Example::

        arena = chemfp.load_fingerprints("queries.fps")
        full_result = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=0.2)
        upper_triangle = chemfp.search.threshold_tanimoto_search_symmetric(
                  arena, threshold=0.2, include_lower_triangle=False)
        assert sum(map(len, full_result)) == sum(map(len, upper_triangle))*2
                  
    :param arena: the set of fingerprints
    :type arena: a FingerprintArena
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :param include_lower_triangle:
        if False, compute only the upper triangle, otherwise use symmetry to compute the full matrix
    :type include_lower_triangle: boolean
    :param batch_size: the number of rows to process before checking for a ^C
    :type batch_size: integer
    :returns: a SearchResults instance
    """
    
    if batch_size <= 0:
        raise ValueError("batch_size must be positive")
    N = len(arena)
    results = SearchResults(N, arena.arena_ids)

    if N:
        # Break it up into batch_size groups in order to let Python's
        # interrupt handler check for a ^C, which is otherwise
        # suppressed until the function finishes.
        for query_start in xrange(0, N, batch_size):
            query_end = min(query_start + batch_size, N)
            _chemfp.threshold_tanimoto_arena_symmetric(
                threshold, arena.num_bits,
                arena.start_padding, arena.end_padding, arena.storage_size, arena.arena,
                query_start, query_end, 0, N,
                arena.popcount_indices,
                results)

        if include_lower_triangle:
            _chemfp.fill_lower_triangle(results, N)
        
    return results
Example #5
0
def threshold_tanimoto_search_symmetric(arena,
                                        threshold,
                                        include_lower_triangle=True):
    assert arena.popcount_indices
    N = len(arena)
    results = SearchResults(N, arena.ids)

    if N:
        _chemfp.threshold_tanimoto_arena_symmetric(
            threshold, arena.num_bits, arena.start_padding, arena.end_padding,
            arena.storage_size, arena.arena, 0, N, 0, N,
            arena.popcount_indices, results)

        if include_lower_triangle:
            _chemfp.fill_lower_triangle(results, N)

    return results
Example #6
0
def fill_lower_triangle(results):
    _chemfp.fill_lower_triangle(results, len(results))