예제 #1
0
    def test_errors(self):
        with self.assertRaisesRegexp(ValueError, "must be a character buffer"):
            _chemfp.align_fingerprint(1, 4, 4)
            
        with self.assertRaisesRegexp(ValueError, "storage size is too small"):
            _chemfp.align_fingerprint("too long", 4, 4)

        with self.assertRaisesRegexp(ValueError, "storage size must be positive"):
            _chemfp.align_fingerprint("", 1, 0)

        with self.assertRaisesRegexp(ValueError, "storage size must be positive"):
            _chemfp.align_fingerprint("X", 1, -12)
            
        with self.assertRaisesRegexp(ValueError, "alignment must be a positive power of two"):
            _chemfp.align_fingerprint("1234", 3, 4)
예제 #2
0
def threshold_tanimoto_search_fp(query_fp, target_arena, threshold=0.7):
    """Search for fingerprint hits in `target_arena` which are at least `threshold` similar to `query_fp`

    The hits in the returned `SearchResult` are in arbitrary order.

    Example::

        query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0]
        targets = chemfp.load_fingerprints("targets.fps")
        print list(chemfp.search.threshold_tanimoto_search_fp(query_fp, targets, threshold=0.15))

    :param query_fp: the query fingerprint
    :type query_fp: a byte string
    :param target_arena: the target arena
    :type target_fp: a FingerprintArena
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: a SearchResult
    """
    _require_matching_fp_size(query_fp, target_arena)

    # Improve the alignment so the faster algorithms can be used
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)

    results = SearchResults(1, target_arena.arena_ids)
    _chemfp.threshold_tanimoto_arena(
        threshold, target_arena.num_bits, query_start_padding,
        query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start,
        target_arena.end, target_arena.popcount_indices, results, 0)
    return results[0]
예제 #3
0
def count_tanimoto_hits_fp(query_fp, target_arena, threshold=0.7):
    """Count the number of hits in `target_arena` at least `threshold` similar to the `query_fp`

    Example::
    
        query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0]
        targets = chemfp.load_fingerprints("targets.fps")
        print chemfp.search.count_tanimoto_hits_fp(query_fp, targets, threshold=0.1)
        

    :param query_fp: the query fingerprint
    :type query_fp: a byte string
    :param target_arena: the target arena
    :type target_fp: a FingerprintArena
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: an integer count
    """
    _require_matching_fp_size(query_fp, target_arena)
    # Improve the alignment so the faster algorithms can be used
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)

    counts = array.array("i", (0 for i in xrange(len(query_fp))))
    _chemfp.count_tanimoto_arena(
        threshold, target_arena.num_bits, query_start_padding,
        query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start,
        target_arena.end, target_arena.popcount_indices, counts)
    return counts[0]
예제 #4
0
 def test_identical(self):
     # This fingerprint is aligned; no need to create a new one
     s = "blah"
     start_padding, end_padding, t = _chemfp.align_fingerprint(s, 4, 4)
     self.assertEquals(start_padding, 0)
     self.assertEquals(end_padding, 0)
     self.assertIs(s, t)
예제 #5
0
 def test_identical(self):
     # This fingerprint is aligned; no need to create a new one
     s = "blah"
     start_padding, end_padding, t = _chemfp.align_fingerprint(s, 4, 4)
     self.assertEquals(start_padding, 0)
     self.assertEquals(end_padding, 0)
     self.assertIs(s, t)
예제 #6
0
def count_tanimoto_hits_fp(query_fp, target_arena, threshold=0.7):
    """Count the number of hits in `target_arena` at least `threshold` similar to the `query_fp`

    Example::
    
        query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0]
        targets = chemfp.load_fingerprints("targets.fps")
        print chemfp.search.count_tanimoto_hits_fp(query_fp, targets, threshold=0.1)
        

    :param query_fp: the query fingerprint
    :type query_fp: a byte string
    :param target_arena: the target arena
    :type target_fp: a FingerprintArena
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: an integer count
    """
    _require_matching_fp_size(query_fp, target_arena)
    # Improve the alignment so the faster algorithms can be used
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)
                                                 
    counts = array.array("i", (0 for i in xrange(len(query_fp))))
    _chemfp.count_tanimoto_arena(threshold, target_arena.num_bits,
                                 query_start_padding, query_end_padding,
                                 target_arena.storage_size, query_fp, 0, 1,
                                 target_arena.start_padding, target_arena.end_padding,
                                 target_arena.storage_size, target_arena.arena,
                                 target_arena.start, target_arena.end,
                                 target_arena.popcount_indices,
                                 counts)
    return counts[0]
예제 #7
0
    def test_errors(self):
        with self.assertRaisesRegexp(ValueError, "must be a character buffer"):
            _chemfp.align_fingerprint(1, 4, 4)

        with self.assertRaisesRegexp(ValueError, "storage size is too small"):
            _chemfp.align_fingerprint("too long", 4, 4)

        with self.assertRaisesRegexp(ValueError,
                                     "storage size must be positive"):
            _chemfp.align_fingerprint("", 1, 0)

        with self.assertRaisesRegexp(ValueError,
                                     "storage size must be positive"):
            _chemfp.align_fingerprint("X", 1, -12)

        with self.assertRaisesRegexp(
                ValueError, "alignment must be a positive power of two"):
            _chemfp.align_fingerprint("1234", 3, 4)
예제 #8
0
def count_tanimoto_hits_fp(query_fp, target_arena, threshold):
    require_matching_fp_size(query_fp, target_arena)
    # Improve the alignment so the faster algorithms can be used
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)

    counts = array.array("i", (0 for i in xrange(len(query_fp))))
    _chemfp.count_tanimoto_arena(
        threshold, target_arena.num_bits, query_start_padding,
        query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start,
        target_arena.end, target_arena.popcount_indices, counts)
    return counts[0]
예제 #9
0
def threshold_tanimoto_search_fp(query_fp, target_arena, threshold):
    require_matching_fp_size(query_fp, target_arena)

    # Improve the alignment so the faster algorithms can be used
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)

    results = SearchResults(1)
    _chemfp.threshold_tanimoto_arena(
        threshold, target_arena.num_bits, query_start_padding,
        query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start,
        target_arena.end, target_arena.popcount_indices, results, 0)
    return results[0]
예제 #10
0
def knearest_tanimoto_search_fp(query_fp, target_arena, k, threshold):
    require_matching_fp_size(query_fp, target_arena)
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)

    if k < 0:
        raise ValueError("k must be non-negative")

    results = SearchResults(1)
    _chemfp.knearest_tanimoto_arena(
        k, threshold, target_arena.num_bits, query_start_padding,
        query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start,
        target_arena.end, target_arena.popcount_indices, results, 0)
    _chemfp.knearest_results_finalize(results, 0, 1)

    return results[0]
예제 #11
0
    def test_different_cases(self):
        for query in (
            ("1", 4, 8),
            ("12", 8, 8),
            ("123", 16, 16),
            ("abcd", 4, 8),
            ("abcd", 8, 8),
            ("abcd", 16, 16),
            ):
            fp, alignment, storage_size = query
            
            result = _chemfp.align_fingerprint(*query)
            start_padding, end_padding, s = result
            i = _addressof(s) + start_padding
            self.assertEquals(i % alignment, 0, (query, result))

            expected = fp + "\0" * (storage_size - len(fp))
            self.assertEquals(s[start_padding:-end_padding], expected,
                              (query, expected, result))

            self.assertEquals(s[:start_padding], "\0"*start_padding)
            self.assertEquals(s[-end_padding:], "\0"*end_padding)
예제 #12
0
def knearest_tanimoto_search_fp(query_fp, target_arena, k=3, threshold=0.7):
    """Search for `k`-nearest hits in `target_arena` which are at least `threshold` similar to `query_fp`

    The hits in the `SearchResults` are ordered by decreasing similarity score.

    Example::

        query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0]
        targets = chemfp.load_fingerprints("targets.fps")
        print list(chemfp.search.knearest_tanimoto_search_fp(query_fp, targets, k=3, threshold=0.0))

    :param query_fp: the query fingerprint
    :type query_fp: a byte string
    :param target_arena: the target arena
    :type target_fp: a FingerprintArena
    :param k: the number of nearest neighbors to find.
    :type k: positive integer
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: a SearchResult
    """
    _require_matching_fp_size(query_fp, target_arena)
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)
    
    if k < 0:
        raise ValueError("k must be non-negative")

    results = SearchResults(1, target_arena.arena_ids)
    _chemfp.knearest_tanimoto_arena(
        k, threshold, target_arena.num_bits,
        query_start_padding, query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start, target_arena.end,
        target_arena.popcount_indices,
        results, 0)
    _chemfp.knearest_results_finalize(results, 0, 1)

    return results[0]
예제 #13
0
    def test_different_cases(self):
        for query in (
            ("1", 4, 8),
            ("12", 8, 8),
            ("123", 16, 16),
            ("abcd", 4, 8),
            ("abcd", 8, 8),
            ("abcd", 16, 16),
        ):
            fp, alignment, storage_size = query

            result = _chemfp.align_fingerprint(*query)
            start_padding, end_padding, s = result
            i = _addressof(s) + start_padding
            self.assertEquals(i % alignment, 0, (query, result))

            expected = fp + "\0" * (storage_size - len(fp))
            self.assertEquals(s[start_padding:-end_padding], expected,
                              (query, expected, result))

            self.assertEquals(s[:start_padding], "\0" * start_padding)
            self.assertEquals(s[-end_padding:], "\0" * end_padding)
예제 #14
0
def knearest_tanimoto_search_fp(query_fp, target_arena, k=3, threshold=0.7):
    """Search for `k`-nearest hits in `target_arena` which are at least `threshold` similar to `query_fp`

    The hits in the `SearchResults` are ordered by decreasing similarity score.

    Example::

        query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0]
        targets = chemfp.load_fingerprints("targets.fps")
        print list(chemfp.search.knearest_tanimoto_search_fp(query_fp, targets, k=3, threshold=0.0))

    :param query_fp: the query fingerprint
    :type query_fp: a byte string
    :param target_arena: the target arena
    :type target_fp: a FingerprintArena
    :param k: the number of nearest neighbors to find.
    :type k: positive integer
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: a SearchResult
    """
    _require_matching_fp_size(query_fp, target_arena)
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)

    if k < 0:
        raise ValueError("k must be non-negative")

    results = SearchResults(1, target_arena.arena_ids)
    _chemfp.knearest_tanimoto_arena(
        k, threshold, target_arena.num_bits, query_start_padding,
        query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena, target_arena.start,
        target_arena.end, target_arena.popcount_indices, results, 0)
    _chemfp.knearest_results_finalize(results, 0, 1)

    return results[0]
예제 #15
0
def threshold_tanimoto_search_fp(query_fp, target_arena, threshold=0.7):
    """Search for fingerprint hits in `target_arena` which are at least `threshold` similar to `query_fp`

    The hits in the returned `SearchResult` are in arbitrary order.

    Example::

        query_id, query_fp = chemfp.load_fingerprints("queries.fps")[0]
        targets = chemfp.load_fingerprints("targets.fps")
        print list(chemfp.search.threshold_tanimoto_search_fp(query_fp, targets, threshold=0.15))

    :param query_fp: the query fingerprint
    :type query_fp: a byte string
    :param target_arena: the target arena
    :type target_fp: a FingerprintArena
    :param threshold: The minimum score threshold.
    :type threshold: float between 0.0 and 1.0, inclusive
    :returns: a SearchResult
    """
    _require_matching_fp_size(query_fp, target_arena)

    # Improve the alignment so the faster algorithms can be used
    query_start_padding, query_end_padding, query_fp = _chemfp.align_fingerprint(
        query_fp, target_arena.alignment, target_arena.storage_size)


    results = SearchResults(1, target_arena.arena_ids)
    _chemfp.threshold_tanimoto_arena(
        threshold, target_arena.num_bits,
        query_start_padding, query_end_padding, target_arena.storage_size, query_fp, 0, 1,
        target_arena.start_padding, target_arena.end_padding,
        target_arena.storage_size, target_arena.arena,
        target_arena.start, target_arena.end,
        target_arena.popcount_indices,
        results, 0)
    return results[0]