Esempio n. 1
0
 def search_by_mols(self, mols, topk=10):
     '''
     :param mols: a list of molecuar
     :param topk:
     :return: [[{"id": xx, "smiles": xx, "score": xx}, {}, ...], []]
     '''
     mols_vec = []
     for mol in mols:
         tmp_arr = np.array([])
         DataStructs.ConvertToNumpyArray(
             rdMolDescriptors.GetMACCSKeysFingerprint(mol), tmp_arr)
         mols_vec.append(self.vec2bytes(tmp_arr))
     ret_dists, ret_ids = self.index.search(
         np.array(mols_vec).astype("uint8"), topk)
     rets = []
     for mol, dists, ids in zip(mols, ret_dists, ret_ids):
         ret = []
         for id in ids:
             ret.append({
                 "id":
                 self.df_zinc.iloc[id]["zinc_id"],
                 "smiles":
                 self.df_zinc.iloc[id]["smiles"],
                 "score":
                 self.calc_similarity(
                     mol,
                     Chem.MolFromSmiles(self.df_zinc.iloc[id]["smiles"]))
             })
     rets.append(sorted(ret, key=lambda item: item["score"], reverse=True))
     return rets
def GenerateMACCS166KeysFingerprints(Mols):
    """Generate MACCS166Keys fingerprints."""

    MiscUtil.PrintInfo("\nGenerating MACCS166Keys %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"])

    # Generate ExplicitBitVect fingerprints...
    MolsFingerprints = [rdMolDescriptors.GetMACCSKeysFingerprint(Mol) for Mol in Mols]

    return MolsFingerprints
def get_maccs(molecule):
    try:
        maccs = rdMolDescriptors.GetMACCSKeysFingerprint(molecule)
        # Does not have length
    except Exception as e:
        print(e)
        print("error" + str(molecule))
        maccs = np.nan
    return maccs
Esempio n. 4
0
def maccs_keys(smiles):
    # mol=Chem.MolFromSmiles(row['smiles']) #aqui entra os smiles
    # res=fingerprint.CalculateMACCSFingerprint(mol)    isto seria se nao fosse vetor
    # result_maccs.append(res)
    mol=Chem.MolFromSmiles(smiles)
    fps=rdMolDescriptors.GetMACCSKeysFingerprint(mol)
	# DataStructs.ConvertToNumpyArray(desc, arr)
    arr = np.array(fps)
    return arr
Esempio n. 5
0
def build_mol_features(in_file, out_file):
    df_zinc = pd.read_csv(in_file, compression="zip")
    fp_list = []
    for smi in tqdm.tqdm(df_zinc["smiles"], total=len(df_zinc)):
        tmp_arr = np.array([])
        DataStructs.ConvertToNumpyArray(
            rdMolDescriptors.GetMACCSKeysFingerprint(Chem.MolFromSmiles(smi)),
            tmp_arr)
        fp_list.append(tmp_arr)
    fp_arr = np.array(fp_list)
    np.save(out_file, fp_arr)
Esempio n. 6
0
    def _encode(smi: str, fingerprint: str, radius: int,
                length: int) -> T_comp:
        """fingerprint functions must be wrapped in a static function
        so that they may be pickled for parallel processing
        
        Parameters
        ----------
        smi : str
            the SMILES string of the molecule to encode
        fingerprint : str
            the the type of fingerprint to generate
        radius : int
            the radius of the fingerprint
        length : int
            the length of the fingerprint
        
        Returns
        -------
        T_comp
            the compressed feature representation of the molecule
        """
        mol = Chem.MolFromSmiles(smi)
        if fingerprint == 'morgan':
            return rdmd.GetMorganFingerprintAsBitVect(mol,
                                                      radius=radius,
                                                      nBits=length,
                                                      useChirality=True)

        if fingerprint == 'pair':
            return rdmd.GetHashedAtomPairFingerprintAsBitVect(mol,
                                                              minLength=1,
                                                              maxLength=1 +
                                                              radius,
                                                              nBits=length)

        if fingerprint == 'rdkit':
            return rdmd.RDKFingerprint(mol,
                                       minPath=1,
                                       maxPath=1 + radius,
                                       fpSize=length)

        if fingerprint == 'maccs':
            return rdmd.GetMACCSKeysFingerprint(mol)

        if fingerprint == 'map4':
            return map4.MAP4Calculator(dimensions=length,
                                       radius=radius,
                                       is_folded=True).calculate(mol)

        raise NotImplementedError(f'Unrecognized fingerprint: "{fingerprint}"')
Esempio n. 7
0
 def calc_similarity(self, mol1, mol2):
     fp_mol1 = rdMolDescriptors.GetMACCSKeysFingerprint(mol1)
     fp_mol2 = rdMolDescriptors.GetMACCSKeysFingerprint(mol2)
     score = DataStructs.TanimotoSimilarity(fp_mol1, fp_mol2)
     return score
Esempio n. 8
0
def MACCS_keys(smiles):
    mol = Chem.MolFromSmiles(smiles)
    fp = rdMolDescriptors.GetMACCSKeysFingerprint(mol)
    arr = np.zeros((0,), dtype=np.int32)
    cDataStructs.ConvertToNumpyArray(fp, arr)
    return arr
Esempio n. 9
0
#structure
embed_fn = np.nan_to_num(fngroups.values)
embed_graph = graph.values

#molecular fingerprint
#https://www.rdkit.org/UGM/2012/Landrum_RDKit_UGM.Fingerprints.Final.pptx.pdf
finger_mqn = []
finger_morgan = []
finger_maccs = []
finger_ap = []

for i in smiles:
    mol = AllChem.MolFromSmiles(i)

    finger_mqn.append(np.array(Descriptors.MQNs_(mol)))
    finger_maccs.append(np.array(Descriptors.GetMACCSKeysFingerprint((mol))))
    #finger_morgan.append(np.array(Descriptors.GetMorganFingerprint((mol))))
    finger_ap.append(np.array(Descriptors.GetAtomPairFingerprint((mol))))

###
names = 'vec_spec,vec_smiles,embed_fn,finger_mqn,finger_maccs,finger_ap,embed_graph'.split(
    ',')
data = [
    vec_spec, vec_smiles, embed_fn, finger_mqn, finger_maccs, finger_ap,
    embed_graph
]
counter = 0
for i in data:
    try:
        res = do_pca(i)
        plt.scatter(res[0], res[1], label=counter, alpha=.4)