Example #1
0
def get_smiles_fp(smiles, ids, filename, num):
    hex_fps = []
    new_ids = []
    new_smiles = []
    count = 0
    for smile in smiles:
        m = Chem.MolFromSmiles(smile)
        if m is None:
            count = count + 1
            continue
        # the method for generating fingerprints - morgan_fp or rdk_fp
        # fp2 = AllChem.GetMorganFingerprintAsBitVect(m, 2, vec_dim)
        fp2 = Chem.RDKFingerprint(m, fpSize=vec_dim)
        hex_fp = DataStructs.BitVectToFPSText(fp2)
        hex_fps.append(hex_fp)
        new_ids.append(ids[count])
        new_smiles.append(smile)
        count = count + 1
    hex_fps = np.array(hex_fps)
    np.save(OUT + '/' + OUT_NPY + '/' + filename + "%03d" % num + '.npy',
            hex_fps)
    save_file(new_smiles,
              OUT + '/' + OUT_SMILES + '/' + filename + "%03d" % num + '.smi')
    save_file(new_ids,
              OUT + '/' + OUT_IDS + '/' + filename + "%03d" % num + '.txt')
    del hex_fps
    del new_smiles
    del new_ids
    gc.collect()
Example #2
0
def smiles_to_vec(smiles):
    mols = Chem.MolFromSmiles(smiles)
    fp = AllChem.GetMorganFingerprintAsBitVect(mols, 2, VECTOR_DIMENSION)
    hex_fp = DataStructs.BitVectToFPSText(fp)
    # print(hex_fp)
    vec = bytes.fromhex(hex_fp)
    return vec
def search_ids_smi_list(table_name, topk, ids, smiles):
    rand = None
    query_list = []

    if ids:
        smiles = get_smi_in_pg(table_name, ids)
    mols = Chem.MolFromSmiles(smiles)
    fp = Chem.RDKFingerprint(mols, fpSize=VECTOR_DIMENSION)
    hex_fp = DataStructs.BitVectToFPSText(fp)
    # print(hex_fp)
    vec = bytes.fromhex(hex_fp)
    query_list.append(vec)

    print("table name:", table_name, "query list:", len(query_list), "topk:", topk)
    time_start = time.time()
    status, results = MILVUS.search(collection_name=table_name, query_records=query_list, top_k=topk, params={})
    time_end = time.time()
    time_cost = time_end - time_start
    print("time_search = ", time_cost)
    print(status,results)

    time_start = time.time()
    save_re_to_file(table_name, results)
    time_end = time.time()
    time_cost = time_end - time_start
    print("time_save = ", time_cost)
Example #4
0
  def test7FPS(self):
    bv = DataStructs.ExplicitBitVect(32)
    bv.SetBit(0)
    bv.SetBit(1)
    bv.SetBit(17)
    bv.SetBit(23)
    bv.SetBit(31)

    self.assertEqual(DataStructs.BitVectToFPSText(bv), "03008280")
    bv2 = DataStructs.CreateFromFPSText("03008280")
    self.assertEqual(bv, bv2)

    self.assertRaises(ValueError, lambda: DataStructs.CreateFromFPSText("030082801"))

    bv2 = DataStructs.CreateFromFPSText("")
    self.assertEqual(bv2.GetNumBits(), 0)
Example #5
0
def _getFPSStream(f, mols, type='morgan', radius=2, n_bits=2048):
    f.write("#FPS1\n#num_bits=%s\n#software=RDKit/%s\n" %
            (n_bits, rdBase.rdkitVersion))
    for i, mol in enumerate(mols):
        if mol:
            idx = i
            if mol.HasProp('chembl_id'):
                idx = mol.GetProp('chembl_id')
            elif Chem.INCHI_AVAILABLE:
                try:
                    Chem.SanitizeMol(mol)
                    idx = Chem.InchiToInchiKey(Chem.MolToInchi(mol))
                except:
                    pass
            if type == 'morgan':
                fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
                    mol, radius, nBits=n_bits)
            elif type == 'pair':
                fp = Pairs.GetAtomPairFingerprintAsBitVect(mol)
            elif type == 'maccs':
                fp = MACCSkeys.GenMACCSKeys(mol)
            f.write("%s\t%s\n" % (DataStructs.BitVectToFPSText(fp), idx))