Esempio n. 1
0
def test_distance():
    fp_a = mhfp_encoder.from_molecular_shingling(mhfp_encoder.shingling_from_smiles('CCOC1=C(C=C(C=C1)S(=O)(=O)N(C)C)C2=NC(=O)C3=C(N2)C(=NN3C)C(C)(C)C', sanitize=True))
    fp_b = mhfp_encoder.from_molecular_shingling(mhfp_encoder.shingling_from_smiles('CCCC1=NN(C2=C1NC(=NC2=O)C3=C(C=CC(=C3)S(=O)(=O)N4CCN(CC4)C)OCC)C', sanitize=True))
    fp_c = mhfp_encoder.from_molecular_shingling(mhfp_encoder.shingling_from_smiles('O=C(OC)C(C1CCCCN1)C2=CC=CC=C2', sanitize=True))
    assert MHFPEncoder.distance(fp, fp_b) == 0.0
    assert MHFPEncoder.distance(fp, fp_a) == 0.45849609375
    assert MHFPEncoder.distance(fp, fp_c) == 0.97216796875
Esempio n. 2
0
  def _get_knn(query_mhfp, ann, k, data):
    """ Brute-force search for selecting k nearest neighbors from k * kc  approximate nearest neighbors.

    Keyword arguments:
        query_mhfp {numpy.ndarray} -- The query MHFP fingerprint.
        ann {list} -- A list of indices of approximate nearest neighbors of size k * kc to be brute-force searched
        k {int} -- The number of nearest neighbors to be returned from the approximate nearest neighbors
        data {dict} -- The MHFP values indexed with the same key supplied to add()
    """

    dists = []

    for index in ann:
        dists.append((index, 1.0 - MHFPEncoder.distance(query_mhfp, data[index])))
    
    dists.sort(key=itemgetter(1), reverse=True)
    return [x[0] for x in dists[:k]]