Beispiel #1
0
def fp_ap_std_mp(mol, mol_can, i, nBits, chiral):
    fp_mol = Pairs.GetHashedAtomPairFingerprint(mol, nBits=nBits, includeChirality=chiral)
    if id(mol) == id(mol_can):
        fp_can = fp_mol
    else:
        fp_can = Pairs.GetHashedAtomPairFingerprint(mol_can, nBits=nBits, includeChirality=chiral)
    return (i, fp_mol, fp_can)
    def score_model(self, model_configuration: dict, fragments_file: str,
                    descriptors_file: str, output_file: str):
        inputoutput_utils.create_parent_directory(output_file)
        model_data = model_configuration["data"]
        active_molecules_ap = []
        for active_molecule in model_data["active"]:
            molecule_smiles = active_molecule.strip("\"")
            molecule = Chem.MolFromSmiles(molecule_smiles)
            ecfp_fingerprint = Pairs.GetAtomPairFingerprint(molecule)
            active_molecules_ap.append(ecfp_fingerprint)

        first_line = True
        with open(output_file, "w", encoding="utf-8") as output_stream:
            with open(fragments_file, "r", encoding="utf-8") as input_stream:
                for new_line in input_stream:
                    line = json.loads(new_line)
                    test_molecule_input = line["smiles"]
                    test_molecule_smiles = test_molecule_input.strip("\"")
                    test_molecule = Chem.MolFromSmiles(test_molecule_smiles)
                    test_mol_fingerprint = Pairs.GetAtomPairFingerprint(
                        test_molecule)
                    max_sim = max([
                        DataStructs.TanimotoSimilarity(test_mol_fingerprint,
                                                       fingerprint)
                        for fingerprint in active_molecules_ap
                    ])
                    score = {"name": line["name"], "score": max_sim}
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)
Beispiel #3
0
def atom_pairs():
    """ Atom pair fingerprints, atom descriptor
    
    """

    # Generate molecules
    ms = [
        Chem.MolFromSmiles('C1CCC1OCC'),
        Chem.MolFromSmiles('CC(C)OCC'),
        Chem.MolFromSmiles('CCOCC')
    ]
    pairFps = [Pairs.GetAtomPairFingerprint(x) for x in ms]

    # Get the list of bits and their counts for each fingerprint as a dictionary
    d = pairFps[-1].GetNonzeroElements()
    print(d)

    # Explanation of the bitscore.
    print(Pairs.ExplainPairScore(558115))

    # Dice similarity; The usual metric for similarity between atom-pair fingerprints
    print(DataStructs.DiceSimilarity(pairFps[0], pairFps[1]))

    # Atom decriptor without count
    pairFps = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in ms]
    print(DataStructs.DiceSimilarity(pairFps[0], pairFps[1]))
Beispiel #4
0
def extract_atompair_fragments(molecule: object) -> list:
    output = []
    pairFps = Pairs.GetAtomPairFingerprint(molecule)
    d = pairFps.GetNonzeroElements()
    for pair in d:
        atom1 = rdkit.Chem.AtomFromSmarts(Pairs.ExplainPairScore(pair)[0][0])
        atom2 = rdkit.Chem.AtomFromSmarts(Pairs.ExplainPairScore(pair)[2][0])
        smiles = (Pairs.ExplainPairScore(pair)[0][0] +
                  Pairs.ExplainPairScore(pair)[2][0])
        atom1_type = atom1.GetAtomicNum()
        atom2_type = atom2.GetAtomicNum()
        atom1_num_pi_bonds = Pairs.ExplainPairScore(pair)[0][2]
        atom2_num_pi_bonds = Pairs.ExplainPairScore(pair)[2][2]
        atom1_num_neigh = Pairs.ExplainPairScore(pair)[0][1]
        atom2_num_neigh = Pairs.ExplainPairScore(pair)[2][1]
        atom1_property_value = 64 * atom1_type + 16 * atom1_num_pi_bonds + atom1_num_neigh
        atom2_property_value = 64 * atom2_type + 16 * atom2_num_pi_bonds + atom2_num_neigh
        dist = Pairs.ExplainPairScore(pair)[1] + 1
        atom_pair_key = min(
            atom1_property_value, atom2_property_value) + 1024 * (
                max(atom1_property_value, atom2_property_value) + 1024 * dist)
        num = (d[pair])
        for i in range(num):
            output.append({
                "smiles": smiles,
                "index": atom_pair_key,
                "type": "AP",
                "size": dist
            })
    return output
Beispiel #5
0
def fp_ap_std(mols, nBits, chiral):
    for i in mols:
        fp_mol = Pairs.GetHashedAtomPairFingerprint(mols[i]["mol"], nBits=nBits, includeChirality=chiral)
        mols[i]["fp"] = fp_mol
        if id(mols[i]["mol"]) == id(mols[i]["mol_can"]):
            mols[i]["fp_can"] = fp_mol
        else:
            mols[i]["fp_can"] = Pairs.GetHashedAtomPairFingerprint(mols[i]["mol_can"], nBits=nBits,
                                                                   includeChirality=chiral)
Beispiel #6
0
def caculate_similarity_atomPairs(smiles_A, smiles_B):
    try:
        m1 = Chem.MolFromSmiles(smiles_A)
        m2 = Chem.MolFromSmiles(smiles_B)
        p1 = Pairs.GetAtomPairFingerprint(m1)
        p2 = Pairs.GetAtomPairFingerprint(m2)
        similarity_p1_p2 = DataStructs.DiceSimilarity(p1, p2)
        return round(similarity_p1_p2, 4)
    except:
        return -1
Beispiel #7
0
def sim_rdk_topo_fps(smiA, smisT):
    """ calculate the fingerprint similarity using the RDK atompair fingerprints
                input are a smiles string and a list of smiles strings
                returned is a list of similarities
        """
    fp_A = Pairs.GetAtomPairFingerprint(getMolFromSmiles(smiA))
    fps_T = [Pairs.GetAtomPairFingerprint(getMolFromSmiles(y)) for y in smisT]

    sim_vector = []
    for t in fps_T:
        sim_vector.append(DataStructs.DiceSimilarity(fp_A, t))

    return sim_vector
Beispiel #8
0
    def findCluster(self, smiles):
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            try:
                scaffold = MurckoScaffold.GetScaffoldForMol(mol)
            except:
                return "", "", False
            if scaffold:
                cluster = Chem.MolToSmiles(scaffold, isomericSmiles=False)
            else:
                return "", "", False
        else:
            return "", "", False

        fp = Pairs.GetAtomPairFingerprint(scaffold)  # Change to Tanimoto?
        if cluster in self.getFingerprints():
            return cluster, fp, False

        fps = list(self.getFingerprints().values())
        sims = DataStructs.BulkTanimotoSimilarity(fp, fps)
        if len(sims) == 0:
            return cluster, fp, True
        closest = np.argmax(sims)
        if sims[closest] >= self.minsimilarity:
            return list(self.getFingerprints().keys())[closest], fp, False
        else:
            return cluster, fp, True
Beispiel #9
0
def fp_atompairs_taut(query, nBits, chiral):
    for i in query:
        for j in range(len(query[i]["tauts"])):
            fp = Pairs.GetHashedAtomPairFingerprint(query[i]["tauts"][j],
                                                    nBits=nBits,
                                                    includeChirality=chiral)
            query[i][f"fp{j}"] = fp
    def atom_pair_fp(self):
        df = pd.read_csv(self.csv_path)
        smiles_list = df['Smiles'].tolist()

        fingerprints = []
        not_found = []

        for i in tqdm(range(len(smiles_list))):
            try:

                mol = Chem.MolFromSmiles(smiles_list[i])
                fp = Pairs.GetAtomPairFingerprintAsIntVect(mol)
                fp._sumCache = fp.GetTotalVal(
                )  #Bit vector here will be huge, which is why taking TotalVal()
                #             bits = fp.ToBitString()
                #             bits_array = (np.fromstring(fp.ToBitString(),'u1') - ord('0'))
                fingerprints.append(fp._sumCache)
                print('fing', fingerprints)
            except:

                fingerprints.append(np.nan)
                not_found.append(i)
                pass

        df.drop(not_found, axis=0, inplace=True)

        print('Number of FPs not found: {}'.format(len(not_found)))

        df.reset_index(drop=True, inplace=True)
        labelencoder = LabelEncoder()
        Y = labelencoder.fit_transform(df['Label'].values)
        Y = Y.reshape(Y.shape[0], 1)

        print('Output shape: {}'.format(Y.shape))

        fp_array = (np.asarray((fingerprints), dtype=object))
        X = np.delete(fp_array, not_found, axis=0)
        X = np.vstack(X).astype(np.float32)
        print('Typeof X', type(X))
        print(X)
        print('Input shape: {}'.format(X.shape))

        final_array = np.concatenate((X, Y), axis=1)

        # Removing rows, from final_array, where duplicate FPs are present
        final_array_slice = final_array[:, 0:(final_array.shape[1] - 1)]
        _, unq_row_indices = np.unique(final_array_slice,
                                       return_index=True,
                                       axis=0)
        final_array_unique = final_array[unq_row_indices]

        print(
            'Number of Duplicate FPs: {}'.format(final_array.shape[0] -
                                                 final_array_unique.shape[0]))

        print('Final Numpy array shape: {}'.format(final_array_unique.shape))
        print('Type of final array: {}'.format(type(final_array_unique)))
        final_numpy_array = np.asarray((final_array_unique), dtype=np.float32)

        return final_numpy_array
Beispiel #11
0
    def compute_pca(self):
        Database = self.Database2
        smiles = list(Database.SMILES)
        smi = [Chem.MolFromSmiles(x) for x in smiles]
        fps = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in smi]
        tanimoto_sim_mat_lower_triangle = GetTanimotoSimMat(fps)
        n_mol = len(fps)
        similarity_matrix = np.ones([n_mol, n_mol])
        i_lower = np.tril_indices(n=n_mol, m=n_mol, k=-1)
        i_upper = np.triu_indices(n=n_mol, m=n_mol, k=1)
        similarity_matrix[i_lower] = tanimoto_sim_mat_lower_triangle
        similarity_matrix[i_upper] = similarity_matrix.T[i_upper]

        sklearn_pca = sklearn.decomposition.PCA(n_components=2,
                                                svd_solver="full",
                                                whiten=True)
        sklearn_pca.fit(similarity_matrix)
        variance = list(sklearn_pca.explained_variance_ratio_)
        a = round(variance[0] * 100, 2)
        b = round(variance[1] * 100, 2)
        pca_result = pd.DataFrame(sklearn_pca.transform(similarity_matrix),
                                  columns=['PC1', 'PC2'])
        pca_result["LIBRARY"] = Database.LIBRARY
        pca_result["TIPO"] = Database.LIBRARY
        pca_result["SMILES"] = Database.SMILES
        pca_result["NAME"] = Database.NAME
        self.pca_result = pca_result.set_index('TIPO')
        variance = list(sklearn_pca.explained_variance_ratio_)
        self.a = round(variance[0] * 100, 2)
        self.b = round(variance[1] * 100, 2)

        return pca_result
Beispiel #12
0
    def computeFP(self, typeFP):

        from rdkit.Chem.Fingerprints import FingerprintMols
        from rdkit.Chem import MACCSkeys
        from rdkit.Chem.AtomPairs import Pairs, Torsions
        from rdkit.Chem import AllChem

        if not "smiclean" in self.__dict__:
            self.log = self.log + "No smiles prepared\n"
            return 1
        else:
            self.mol = Chem.MolFromSmiles(self.smiclean)
            #print self.smiclean

        dFP = {}
        if typeFP == "Mol" or typeFP == "All":
            dFP["Mol"] = FingerprintMols.FingerprintMol(self.mol)
        if typeFP == "MACCS" or typeFP == "All":
            dFP["MACCS"] = MACCSkeys.GenMACCSKeys(self.mol)
        if typeFP == "pairs" or typeFP == "All":
            dFP["pairs"] = Pairs.GetAtomPairFingerprint(self.mol)
        if typeFP == "Torsion" or typeFP == "All":
            dFP["Torsion"] = Torsions.GetTopologicalTorsionFingerprint(
                self.mol)
        if typeFP == "Morgan" or typeFP == "All":
            dFP["Morgan"] = AllChem.GetMorganFingerprint(self.mol, 2)

        self.FP = dFP
        return 0
Beispiel #13
0
 def atom_pairs(self):
     ms = np.array([Chem.MolFromSmiles(i) for i in self.data.SMILES])
     # compute Atom Pair
     fp = [
         Pairs.GetAtomPairFingerprint(
             Chem.RemoveHs(x)).GetNonzeroElements() for x in ms
     ]
     # obtain all bits present
     bits_ap = set()
     for i in fp:
         bits_ap.update([*i])  # add bits for each molecule
     bits_ap = sorted(bits_ap)
     feature_matrix = list()
     # convert fp to bits
     for item in fp:
         vect_rep = np.isin(
             bits_ap, [*item])  # vect_rep, var that indicates bits presents
         # identify axis to replace
         ids_to_update = np.where(vect_rep == True)
         vect_rep = 1 * vect_rep
         vect_rep = np.array(vect_rep).astype(int)
         # replace indices with bict values
         vect_rep[ids_to_update] = list(item.values())
         feature_matrix.append(vect_rep)
     return feature_matrix
Beispiel #14
0
    def compute_tsne(self):
        Database = self.Database2
        smiles = list(Database["SMILES"])
        smi = [Chem.MolFromSmiles(x) for x in smiles]
        fps = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in smi]
        tanimoto_sim_mat_lower_triangle = GetTanimotoSimMat(fps)
        n_mol = len(fps)
        similarity_matrix = np.ones([n_mol, n_mol])
        i_lower = np.tril_indices(n=n_mol, m=n_mol, k=-1)
        i_upper = np.triu_indices(n=n_mol, m=n_mol, k=1)
        similarity_matrix[i_lower] = tanimoto_sim_mat_lower_triangle
        similarity_matrix[i_upper] = similarity_matrix.T[i_upper]
        distance_matrix = np.subtract(1, similarity_matrix)

        TSNE_sim = TSNE(
            n_components=2,
            init='pca',
            random_state=1992,
            angle=0.3,
            perplexity=self.perplexity).fit_transform(distance_matrix)
        tsne_result = pd.DataFrame(data=TSNE_sim, columns=["PC1", "PC2"])
        tsne_result["LIBRARY"] = list(Database.LIBRARY)
        tsne_result["TIPO"] = list(Database.LIBRARY)
        tsne_result["SMILES"] = list(Database.SMILES)
        tsne_result["NAME"] = list(Database.NAME)
        self.tsne_result = tsne_result.set_index('TIPO')
Beispiel #15
0
def _atomsFingerprintsClustering(rdkit_mols):
    """
        Returns the dice distance matrix based on atomsfingerprints method

        Parameters
        ----------
        rdkit_mols: list
            The list of rdkit.Chem.rdchem.Mol objects

        Returns
        -------
        dicematrix: np.array
            The numpy array containing the dice matrix
        """
    from rdkit.Chem.AtomPairs import Pairs  # Atom pairs

    fps = []
    for m in tqdm(rdkit_mols):
        fps.append(Pairs.GetAtomPairFingerprint(m))

    aprun = ParallelExecutor(n_jobs=-1)  # _config['ncpus'])
    dice_matrix = aprun(total=len(fps), desc='AtomsFingerprints Distance') \
        (delayed(DiceDistances)(fp1, fps) for fp1 in fps)

    return np.array(dice_matrix)
Beispiel #16
0
def atom_pairs_similarity(active_molecules1, test_molecules):
    similarity = []
    active_molecules_pairfps = [
        Pairs.GetAtomPairFingerprint(p) for p in active_molecules1
    ]
    test_molecules_pairsfps = [
        Pairs.GetAtomPairFingerprint(p) for p in test_molecules
    ]
    for i in range(len(test_molecules_pairsfps)):
        num_sim = 0
        for j in range(len(active_molecules_pairfps)):
            sim = DataStructs.DiceSimilarity(test_molecules_pairsfps[i],
                                             active_molecules_pairfps[j])
            if sim > num_sim:
                num_sim = sim
        similarity.append(num_sim)
    return similarity
Beispiel #17
0
 def testPairsRegression(self):
   inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.aps.pkl.gz'), 'rb')
   atomPairs = cPickle.load(inF, encoding='bytes')
   for i, m in enumerate(self.mols):
     ap = Pairs.GetAtomPairFingerprint(m)
     if ap != atomPairs[i]:  # pragma: nocover
       debugFingerprint(m, ap, atomPairs[i])
     self.assertEqual(ap, atomPairs[i])
     self.assertNotEqual(ap, atomPairs[i - 1])
Beispiel #18
0
def fingerprint_smile(smile, fp_type):
    murcko = get_murcko_smile(smile)
    mol = Chem.MolFromSmiles(murcko)
    if fp_type == "atom-pair":
        fps = Pairs.GetAtomPairFingerprintAsBitVect(mol)
    elif fp_type == "maccs":
        fps = MACCSkeys.GenMACCSKeys(mol)
    else:
        fps = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=1024)
    return fps
Beispiel #19
0
def orng_sim_rdk_atompair_fps(smile_active, train_instance):
    """ calculate the fingerprint similarity using the RDK atom pair fingerprints
                input are a smiles string and a orange data instance
                returned is a similaritie value
        """
    smilesName = getSMILESAttr(train_instance)
    if not smilesName: return None
    smile_train = str(train_instance[smilesName].value)

    molAct = getMolFromSmiles(smile_active)
    molTrain = getMolFromSmiles(smile_train)

    if not molAct: return None
    if not molTrain: return None

    fp_A = Pairs.GetAtomPairFingerprint(molAct)
    fp_T = Pairs.GetAtomPairFingerprint(molTrain)
    sim = DataStructs.DiceSimilarity(fp_A, fp_T)

    return sim
def fingerprint(mol, fp_type="DL"):
    if fp_type == "DL":
        return FingerprintMols.FingerprintMol(mol)
    elif fp_type == "circular":
        return AllChem.GetMorganFingerprintAsBitVect(mol, 3, nBits=1024)
    elif fp_type == "MACCS":
        return MACCSkeys.GenMACCSKeys(mol)
    elif fp_type == "torsions":
        return Pairs.GetAtomPairFingerprintAsBitVect(mol)
    elif fp_type == "pharm":
        return Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
def get_similarity(): # get similarities on the first molecule in compound group
    # precalculate fingerprints for reference compound
    ref_morgan2 = AllChem.GetMorganFingerprintAsBitVect(mols[0],radius,bit_size)
    ref_cmorgan2 = AllChem.GetMorganFingerprint(mols[0],radius)
    ref_fmorgan2 = AllChem.GetMorganFingerprintAsBitVect(mols[0], radius,bit_size, useFeatures = True)
    ref_ap = Pairs.GetAtomPairFingerprint(mols[0])
    # precalculate fingerprints and bit information for test molecules
    total_sims = ''
    fps_morgan2 = []
    fps_cmorgan2 = []
    fps_fmorgan2 = []
    fps_ap = []
    info_morgan2 = []
    info_cmorgan2 = []
    info_fmorgan2 = []
    num_mols = len(mols) - 1
    reference = compounds[0]
    del compounds[0] 
    del mols[0] #remove reference cmp from list
    for m in mols:
        info = {}
        fps_morgan2.append(AllChem.GetMorganFingerprintAsBitVect(m, radius, bit_size,  bitInfo = info))
        info_morgan2.append(info)
        info = {}
        fps_cmorgan2.append(AllChem.GetMorganFingerprint(m, radius, bitInfo=info))
        info_cmorgan2.append(info)
        info = {}
        fps_fmorgan2.append(AllChem.GetMorganFingerprintAsBitVect(m, radius, bit_size, useFeatures=True, bitInfo=info))
        info_fmorgan2.append(info)
        fps_ap.append(Pairs.GetAtomPairFingerprint(m))
    ## calculate similarities
    for i,m in enumerate(mols):
        ap_simil = DataStructs.DiceSimilarity(ref_ap, fps_ap[i])
        morgan2_simil = DataStructs.DiceSimilarity(ref_morgan2, fps_morgan2[i])
        cmorgan2_simil = DataStructs.DiceSimilarity(ref_cmorgan2, fps_cmorgan2[i])
        fmorgan2_simil = DataStructs.DiceSimilarity(ref_fmorgan2, fps_fmorgan2[i])
        sims =str(reference)+' '+ str(compounds[i].rstrip())+' '+ str(ap_simil)+' '+str(morgan2_simil)+' '+str(cmorgan2_simil)+' '+str(fmorgan2_simil)+'\n'
        total_sims += sims
    return total_sims
Beispiel #22
0
def atom_fp(Library):
        ms = list()
        sim = list()
        y = list()
        random.seed(43)
        N=round(len(Library)*.2)
        X = random.sample(Library,N)
        ms=[Chem.MolFromSmiles(i) for i in X]
        fps_atom = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in ms]
        Atom = [DataStructs.FingerprintSimilarity(y,x) for x,y in it.combinations(fps_atom,2)]
        Atom.sort()
        sim = Atom    
        y= np.arange(1, len(sim) + 1)/ len(sim)
        return sim, y
Beispiel #23
0
def Atompair_fp(mol, rc_names):
    fp = [Pairs.GetAtomPairFingerprint(x) for x in mol]
    tc_df = pd.DataFrame(index=rc_names, columns=rc_names).fillna(0)

    for c1 in range(len(fp)):
        tc_df[rc_names[c1]] = [
            DataStructs.DiceSimilarity(fp[c1], fp[c2]) for c2 in range(len(fp))
        ]

    clusters = linkage(tc_df.as_matrix(columns=None), "ward")
    clust_tree = to_tree(clusters, rd=False)
    d3Dendro = dict(children=[], name=" ")
    add_node(clust_tree, d3Dendro)
    label_tree(d3Dendro["children"][0], rc_names)

    return d3Dendro
def getCountInfo(m, fpType):
    #     m = Chem.MolFromSmiles(formula)
    fp = None
    if fpType == 'AtomPair' or fpType.lower() == 'atom':
        fp = Pairs.GetAtomPairFingerprint(m)
        return fp.GetNonzeroElements()
    elif fpType.lower() == 'morgan' or fpType.lower() == 'circular':
        fp = AllChem.GetMorganFingerprint(m, 2)
        return fp.GetNonzeroElements()
    elif fpType == 'Topological' or fpType.lower() == 'topo':
        fp = Torsions.GetTopologicalTorsionFingerprint(m)
        Dict = fp.GetNonzeroElements()
        convertedDict = {}
        for elem in Dict:
            convertedDict[int(elem)] = Dict[elem]
        return convertedDict
Beispiel #25
0
 def calculate_atom_pair_fp(molecular_df, col):
     """
     Calculates atom pair fingerprint
     :param molecular_df: pandas data frame containing molecules
     :param col: column with molecules present
     :return:
     """
     fps = []
     for index, row in molecular_df.iterrows():
         try:
             mol = Chem.MolFromSmiles(row[col])
             fp = Pairs.GetAtomPairFingerprintAsBitVect(mol)
             fps.append(fp)
         except:
             fps.append('N/A')
     molecular_df['atom_pair_fp'] = fps
     return molecular_df
Beispiel #26
0
def Fingerprints(mols, fingerprint):

    # Indigo fingerprints
    if fingerprint in indigofps:
        return [mol.fingerprint(fingerprint) for mol in mols]

    # RDKit fingerprints
    if fingerprint in rdkitfps:
        if fingerprint == "atompair":
            return [Pairs.GetAtomPairFingerprintAsBitVect(mol) for mol in mols]
        elif fingerprint == "avalon":
            return [pyAvalonTools.GetAvalonFP(mol) for mol in mols]
        elif fingerprint == "daylight":
            return [Chem.RDKFingerprint(mol, fpSize=2048) for mol in mols]
        elif fingerprint == "maccs":
            return [MACCSkeys.GenMACCSKeys(mol) for mol in mols]
        elif fingerprint == "morgan":
            return [(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024))
                    for mol in mols]
        elif fingerprint == "pharm2d":
            return [
                Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
                for mol in mols
            ]
        elif fingerprint == "topological":
            return [FingerprintMols.FingerprintMol(mol) for mol in mols]

    # RDKit non-bit (integer or float) fingerprints
    if fingerprint in rdkitnonbitfps:
        if fingerprint == "sheridan":
            return [Sheridan.GetBPFingerprint(mol) for mol in mols]
        elif fingerprint == "topotorsion":
            return [
                Torsions.GetTopologicalTorsionFingerprint(mol) for mol in mols
            ]

    # E-state fingerprints
    if fingerprint in rdkitestatefps:
        if fingerprint == "estate1":
            return [Fingerprinter.FingerprintMol(mol)[0] for mol in mols]
        elif fingerprint == "estate2":
            return [Fingerprinter.FingerprintMol(mol)[1] for mol in mols]

    # unknown fingerprint
    return None
Beispiel #27
0
    def computeFP(self, typeFP):

        if not "mol" in self.__dict__:
            self.log = self.log + "No smiles prepared\n"
            self.err = 1
        else:
            d_FP = {}
            if typeFP == "Mol" or typeFP == "All":
                d_FP["Mol"] = FingerprintMols.FingerprintMol(self.mol)
            if typeFP == "MACCS" or typeFP == "All":
                d_FP["MACCS"] = MACCSkeys.GenMACCSKeys(self.mol)
            if typeFP == "pairs" or typeFP == "All":
                d_FP["pairs"] = Pairs.GetAtomPairFingerprint(self.mol)
            if typeFP == "Torsion" or typeFP == "All":
                d_FP["Torsion"] = Torsions.GetTopologicalTorsionFingerprint(self.mol)
            if typeFP == "Morgan" or typeFP == "All":
                d_FP["Morgan"] = AllChem.GetMorganFingerprint(self.mol, 2)
            
            self.d_FP = d_FP
Beispiel #28
0
def compare_structure(smiles1, smiles2, fp_type="Morgan", sim_type="Dice"):
    """
    Task: Compare structual similarity of two compound based on fingerprints.
    Parameters:
        smiles1: str, smiles of the compound 1
        smiles2: str, smiles of the compound 2
        fp_type: str, type of fingerprints
        sim_type: str, method for calculating similarity
    """
    if fp_type == "Morgan":
        getfp = lambda smi: AllChem.GetMorganFingerprint(
            Chem.MolFromSmiles(smi), 2, useFeatures=False)
    elif fp_type == "MorganWithFeature":
        getfp = lambda smi: AllChem.GetMorganFingerprint(
            Chem.MolFromSmiles(smi), 2, useFeatures=True)
    elif fp_type == "MACCS":
        getfp = lambda smi: Chem.MACCSkeys.GenMACCSKeys(Chem.MolFromSmiles(smi)
                                                        )
    elif fp_type == "Topological":
        getfp = lambda smi: FingerprintMols.FingerprintMol(
            Chem.MolFromSmiles(smi))
    elif fp_type == "AtomPairs":
        getfp = lambda smi: Pairs.GetAtomPairFingerprint(
            Chem.MolFromSmiles(smi))

    try:
        fp1 = getfp(smiles1)
        fp2 = getfp(smiles2)
        if sim_type == "Dice":
            sim_fp = DataStructs.DiceSimilarity(fp1, fp2)
        elif sim_type == "Tanimoto":
            sim_fp = DataStructs.TanimotoSimilarity(fp1, fp2)
        elif sim_type == "Cosine":
            sim_fp = DataStructs.CosineSimilarity(fp1, fp2)
        elif sim_type == "Sokal":
            sim_fp = DataStructs.SokalSimilarity(fp1, fp2)
        elif sim_type == "Russel":
            sim_fp = DataStructs.RusselSimilarity(fp1, fp2)

    except Exception as e:
        sim_fp = -1
    return sim_fp
def atom_pairs_fp(SMILES, Library):
    ms = [Chem.MolFromSmiles(i) for i in SMILES]
    fp = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in ms]
    sim = [DataStructs.FingerprintSimilarity(y, x) for x, y in it.combinations(fp, 2)]
    sim.sort()
    #     sim = MACCKeys
    y = np.arange(1, len(sim) + 1) / len(sim)  # eje y#estatistical values
    stat = {
        "MIN": [round(min(sim), 2)],
        "1Q": [round(np.percentile(sim, 25))],
        "MEDIAN": [round(st.median(sim))],
        "MEAN": [round(st.mean(sim), 2)],
        "3Q": [round(np.percentile(sim, 75), 2)],
        "MAX": [max(sim)],
        "STD": [round(st.stdev(sim), 2)],
        "Library": [str(Library)],
    }
    df = pd.DataFrame.from_dict(stat)
    fp_result = {"sim": sim, "y": np.arange(1, len(sim) + 1) / len(sim), "df": df}
    return fp_result
Beispiel #30
0
 def testPairsRegression(self):
   inF = gzip.open(os.path.join(self.testDataPath,'mols1000.aps.pkl.gz'),'rb')
   atomPairs = cPickle.load(inF, encoding='bytes')
   for i,m in enumerate(self.mols):
     ap = Pairs.GetAtomPairFingerprint(m)
     #if ap!=atomPairs[i]:
     #  print Chem.MolToSmiles(m)
     #  pd=ap.GetNonzeroElements()
     #  rd=atomPairs[i].GetNonzeroElements()
     #  for k,v in pd.iteritems():
     #    if rd.has_key(k):
     #      if rd[k]!=v: print '>>>1',k,v,rd[k]
     #    else:
     #      print '>>>2',k,v
     #  for k,v in rd.iteritems():
     #    if pd.has_key(k):
     #      if pd[k]!=v: print '>>>3',k,v,pd[k]
     #    else:
     #      print '>>>4',k,v
     self.assertTrue(ap==atomPairs[i])
     self.assertTrue(ap!=atomPairs[i-1])
    fps_fmorgan2.append(AllChem.GetMorganFingerprintAsBitVect(m, radius, bit_size, useFeatures=True, bitInfo=info))
    info_fmorgan2.append(info)
    fps_ap.append(Pairs.GetAtomPairFingerprint(m))

### ATOM PAIRS
print "generate atom pairs similarity maps"
# calculate weights
mol_weights = []
for i,m in enumerate(mols):
    weights = []
    orig_simil = DataStructs.DiceSimilarity(ref_ap, fps_ap[i])
    matrix = rdmolops.GetDistanceMatrix(m)
    for at1 in range(m.GetNumAtoms()):
        new_fp = copy.deepcopy(fps_ap[i])
        for at2 in range(m.GetNumAtoms()):
            bit = Pairs.pyScorePair(m.GetAtomWithIdx(at1), m.GetAtomWithIdx(at2), matrix[at1][at2])
            new_fp[bit] -= 1
        new_simil = DataStructs.DiceSimilarity(ref_ap, new_fp)
        weights.append(orig_simil - new_simil)
    mol_weights.append(weights)
# normalization
mol_weights = getNormalizedWeights(mol_weights)
# draw similarity maps
generateSimilarityMaps(mols, mol_weights, 'ap')

### MORGAN2
print "generate morgan2 similarity maps"
# calculate weights
mol_weights = []
for i,m in enumerate(mols):
    weights = []