Beispiel #1
0
def CalcShapeDescriptors(mol):
    '''
    input: Rdkit Mol 
    output: list of molecule shape descriptors, in the format of 
    
    [PMI1, PMI2, PMI3, NPR1, NPR2]
    '''
    from rdkit.Chem import Descriptors3D

    mol = Chem.AddHs(mol)
    #generate 3d conf
    #this can be optmized later for better sampling of confs
    AllChem.EmbedMolecule(mol,
                          useExpTorsionAnglePrefs=True,
                          useBasicKnowledge=True)
    AllChem.UFFOptimizeMolecule(mol)

    PMI1 = Descriptors3D.PMI1(mol)
    PMI2 = Descriptors3D.PMI2(mol)
    PMI3 = Descriptors3D.PMI3(mol)
    NPR1 = Descriptors3D.NPR1(mol)
    NPR2 = Descriptors3D.NPR2(mol)

    return [PMI1, PMI2, PMI3, NPR1, NPR2]
Beispiel #2
0
def Sphericity(mol):
    from rdkit.Chem import AllChem
    from rdkit.Chem import Descriptors3D
    m = Chem.AddHs(mol)
    try:  # this function can raise a ValueError for unkekulizable molecules
        AllChem.EmbedMolecule(m)
    except ValueError:
        return False
    try:
        AllChem.UFFOptimizeMolecule(m)
    except ValueError:
        print "ValueError with UFFOptimize, mol:", Chem.MolToSmiles(
            m), Chem.MolToSmiles(mol)
        return False
    sphericity = Descriptors3D.SpherocityIndex(m)
    if sphericity > maxSphericity:
        return 'maxSphericity {}'.format(sphericity)
    return False
Beispiel #3
0
def run_comparison(references=None, conformers=None):

    references_file = args.references
    conformers_file = args.conformers

    print(' -- -- -- -- -- LOADING REFERENCES IN {} -- -- -- -- -- '.format(
        references_file))
    print(' -- -- -- -- -- LOADING CONFORMERS IN {} -- -- -- -- -- '.format(
        conformers_file))

    lowest_rmsd = []
    #out_macro_refs		= Chem.SDWriter('Macrocycles_references.sdf')
    out_RMSD = Chem.SDWriter('Aligment_RMSD.sdf')
    #out_macro_confs		= Chem.SDWriter('Macrocycles_conformers.sdf')

    print(' -- -- -- -- -- STARTING ANALYSIS -- -- -- -- -- ')
    ref_index = 1

    for ref in Chem.SDMolSupplier(references_file):

        print(ref_index, ')', ref.GetProp('_Name').split('_')[0])

        macrocycle_atoms = []
        all_cycles = Chem.GetSymmSSSR(ref)
        cycles_size = [i for i in all_cycles if len(i) >= 8]
        for element in cycles_size:
            macrocycle_atoms += list(element)
        all_atoms = [i.GetIdx() for i in ref.GetAtoms()]
        atoms_to_remove = (list(set(all_atoms) - set(macrocycle_atoms)))

        macrocycle = Chem.RWMol(ref)
        for i in sorted(atoms_to_remove, reverse=True):
            macrocycle.RemoveAtom(i)

        m_ref = macrocycle.GetMol()
        m_ref.UpdatePropertyCache()

        macrocycle_atoms = sorted(list(set(macrocycle_atoms)))
        print('Initial Num Atoms:', len(all_atoms))
        print('Macrocycle length:', len(macrocycle_atoms))

        m_ref_smiles = Chem.MolFragmentToSmiles(ref,
                                                macrocycle_atoms,
                                                kekuleSmiles=True)
        m_ref_smiles = Chem.MolFromSmiles(m_ref_smiles, sanitize=False)

        ref_index = ref_index + 1
        mol_index = 0
        table = pd.DataFrame()

        for mol in Chem.SDMolSupplier(conformers_file):
            if ref.GetProp('_Name').split('_')[0] == mol.GetProp(
                    '_Name').split('_')[0]:

                table.loc[mol_index, 'Conformer'] = [mol.GetProp('_Name')]

                ref_atoms = ref.GetSubstructMatch(m_ref_smiles)
                mol_atoms = mol.GetSubstructMatch(m_ref_smiles)
                amap = zip(mol_atoms, ref_atoms)
                rms_macrocycle = AllChem.GetBestRMS(mol, ref, map=[list(amap)])

                mol.SetProp('RMSD_macrocycle', str(rms_macrocycle))
                table.loc[mol_index, 'RMSD_macrocycle'] = [rms_macrocycle]

                macrocycle_atoms = []
                all_cycles = Chem.GetSymmSSSR(mol)
                cycles_size = [i for i in all_cycles if len(i) >= 8]
                for element in cycles_size:
                    macrocycle_atoms += list(element)
                all_atoms = [i.GetIdx() for i in mol.GetAtoms()]
                atoms_to_remove = (
                    list(set(all_atoms) - set(macrocycle_atoms)))

                macrocycle = Chem.RWMol(mol)
                for i in sorted(atoms_to_remove, reverse=True):
                    macrocycle.RemoveAtom(i)

                m_mol = macrocycle.GetMol()
                m_mol.UpdatePropertyCache()

                #m_mol=Chem.MolFragmentToSmiles(mol,macrocycle_atoms,kekuleSmiles=True)
                #m_mol=Chem.MolFromSmiles(m_mol,sanitize=False)

                radious_macro = Descriptors3D.RadiusOfGyration(m_mol)
                table.loc[mol_index, 'RoG_macrocycle'] = radious_macro

                tt_macro = rdMolDescriptors.GetTopologicalTorsionFingerprint(
                    m_mol)
                table.loc[mol_index,
                          'TF_macrocycle'] = [tt_macro.GetTotalVal()]

                r_list = Chem.TorsionFingerprints.CalculateTorsionLists(m_ref)
                r_angles = Chem.TorsionFingerprints.CalculateTorsionAngles(
                    m_ref, r_list[0], r_list[1])
                c_list = Chem.TorsionFingerprints.CalculateTorsionLists(m_mol)
                c_angles = Chem.TorsionFingerprints.CalculateTorsionAngles(
                    m_mol, c_list[0], c_list[1])

                if len(r_angles) == len(c_angles):
                    torsion_macro = Chem.TorsionFingerprints.CalculateTFD(
                        r_angles, c_angles)
                    table.loc[mol_index, 'TFD_macrocycle'] = [torsion_macro]
                else:
                    table.loc[mol_index, 'TFD_macrocycle'] = ['NA']

                cmd.read_molstr(Chem.MolToMolBlock(ref), 'ref')
                cmd.read_molstr(Chem.MolToMolBlock(mol), 'mol')
                rmsd = cmd.rms_cur('ref', 'mol')
                cmd.deselect()
                cmd.delete('all')

                mol.SetProp('RMSD_heavy_atoms', str(rmsd))
                table.loc[mol_index, 'RMSD_heavy_atoms'] = [rmsd]

                out_RMSD.write(mol)

                radious = Descriptors3D.RadiusOfGyration(mol)
                table.loc[mol_index, 'RoG_heavy_atoms'] = radious

                tt = rdMolDescriptors.GetTopologicalTorsionFingerprint(mol)
                table.loc[mol_index, 'TF_heavy_atoms'] = [tt.GetTotalVal()]

                r_list = Chem.TorsionFingerprints.CalculateTorsionLists(ref)
                r_angles = Chem.TorsionFingerprints.CalculateTorsionAngles(
                    ref, r_list[0], r_list[1])
                c_list = Chem.TorsionFingerprints.CalculateTorsionLists(mol)
                c_angles = Chem.TorsionFingerprints.CalculateTorsionAngles(
                    mol, c_list[0], c_list[1])

                if len(r_angles) == len(c_angles):
                    torsion = Chem.TorsionFingerprints.CalculateTFD(
                        r_angles, c_angles)
                    table.loc[mol_index, 'TFD_heavy_atoms'] = [torsion]
                else:
                    table.loc[mol_index, 'TFD_heavy_atoms'] = ['NA']

                mol_index = mol_index + 1

        if len(table.index) > 0:
            sort = table.sort_values('RMSD_macrocycle', ascending=True)
            sort = sort.reset_index(drop=True)
            sort.to_csv(ref.GetProp('_Name') + '.csv')

            sort['Nconf'] = len(sort.index)
            print('Number of conformers analyzed:', len(sort.index))
            print('data in file:', ref.GetProp('_Name') + '.csv')
            print('-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- \n')

            sort['Span_Rog_macrocycle'] = float(
                max(sort['RoG_macrocycle']) - min(sort['RoG_macrocycle']))
            sort['Span_Rog_heavy_atoms'] = float(
                max(sort['RoG_heavy_atoms']) - min(sort['RoG_heavy_atoms']))

            lowest_rmsd.append(sort.loc[0])
        else:
            print('No reference or conformers found in input files for {}'.
                  format(ref.GetProp('_Name')))
            print(' ************************************ \n')

    #out_macro_refs.close()
    out_RMSD.close()
    #out_macro_confs.close()

    print('SAVING DATA OF LOWEST RMSD OF CONFORMERS')

    summary = pd.DataFrame(lowest_rmsd)
    summary = summary.reset_index(drop=True)
    summary.to_csv('Lowest_RMSD_Data.csv')

    print('Lowest RMSD Data in file: Lowest_RMSD_Data.csv')
    print('***************************************************\n')
    print('Structures in files: Alignment_RMSD.sdf')
    print('***************************************************\n')
    print(
        'CALCULATION OF {} OUT OF {} REFERENCES DONE, FILES SAVED. THANK YOU FOR USING THIS SCRIPT \n'
        .format(len(summary.index), len(Chem.SDMolSupplier(references_file))))
def run_comparison(references=None, conformers=None):

    references = args.references
    conformers = args.conformers

    templates = []
    lowest_rmsd = []

    for reference in AllChem.SDMolSupplier(references):
        if reference.HasProp('_Name'):
            ref_id = reference.GetProp('_Name').split('_')[0]
            templates.append([ref_id, reference])

    mol_RMSD = []
    mol_references = []
    mol_O3A = []
    mol_minimized = []

    for refer in templates:

        try:

            print('Processing:', refer[0])
            conformer = []
            rmsd = []
            similarity_3D = []
            O3A_result = []
            t_angles = []
            r_gyration = []
            i_energy = []
            f_energy = []
            rmsd_minimized = []

            for mol in AllChem.SDMolSupplier(conformers):

                if refer[0] == mol.GetProp('_Name').split('_')[0]:

                    mol_copy = mol
                    name = str(mol.GetProp('_Name'))
                    conformer.append(name)

                    #Aligment and RMSD calculation based on Maximum Common Structure SMARTS
                    r = rdFMCS.FindMCS([mol, refer[1]])
                    a = refer[1].GetSubstructMatch(
                        Chem.MolFromSmarts(r.smartsString))
                    b = mol.GetSubstructMatch(
                        Chem.MolFromSmarts(r.smartsString))
                    mapa = list(zip(b, a))

                    rms = rdMolAlign.AlignMol(mol, refer[1], atomMap=mapa)
                    rmsd.append(rms)
                    mol.SetProp('RMSD', str(rms))
                    mol_RMSD.append(mol)
                    mol_references.append(refer[1])

                    # Tortional fingerprint

                    r_list = Chem.TorsionFingerprints.CalculateTorsionLists(
                        refer[1])
                    r_angles = Chem.TorsionFingerprints.CalculateTorsionAngles(
                        refer[1], r_list[0], r_list[1])
                    c_list = Chem.TorsionFingerprints.CalculateTorsionLists(
                        mol)
                    c_angles = Chem.TorsionFingerprints.CalculateTorsionAngles(
                        mol, c_list[0], c_list[1])
                    torsion = Chem.TorsionFingerprints.CalculateTFD(
                        r_angles, c_angles)
                    t_angles.append(torsion)

                    #Radious of gyration

                    radious = Descriptors3D.RadiusOfGyration(mol)
                    r_gyration.append(radious)
                    mp = AllChem.MMFFGetMoleculeProperties(mol)
                    mmff = AllChem.MMFFGetMoleculeForceField(mol, mp)
                    energy_value = mmff.CalcEnergy()
                    i_energy.append(energy_value)

                    # Energy and minimization

                    m2 = mol
                    AllChem.EmbedMolecule(m2)
                    AllChem.MMFFOptimizeMolecule(m2, mmffVariant='MMFF94')
                    mp = AllChem.MMFFGetMoleculeProperties(m2)
                    mmff = AllChem.MMFFGetMoleculeForceField(m2, mp)
                    energy_value_minimized = mmff.CalcEnergy()
                    f_energy.append(energy_value_minimized)

                    m3 = Chem.RemoveHs(m2)
                    r = rdFMCS.FindMCS([m3, refer[1]])
                    a = refer[1].GetSubstructMatch(
                        Chem.MolFromSmarts(r.smartsString))
                    b = m3.GetSubstructMatch(Chem.MolFromSmarts(
                        r.smartsString))
                    mapa = list(zip(b, a))

                    rms_2 = rdMolAlign.AlignMol(m3, refer[1], atomMap=mapa)
                    rmsd_minimized.append(rms_2)
                    m3.SetProp('RMSD', str(rms_2))
                    mol_minimized.append(m3)

                    O3A = rdMolAlign.GetO3A(mol_copy, refer[1])
                    align = O3A.Align()
                    O3A_result.append(align)
                    mol_copy.SetProp('O3A', str(align))
                    mol_O3A.append(mol_copy)

            d = {
                'conformer': pd.Series(conformer),
                'RMSD': pd.Series(rmsd),
                'O3A_value': pd.Series(O3A_result),
                'Torsional_Fingerprint': pd.Series(t_angles),
                'Radius_of_Gyration': pd.Series(r_gyration),
                'Initial_Energy': pd.Series(i_energy),
                'Minimization_Energy': pd.Series(f_energy),
                'RMSD_after_minimization': pd.Series(rmsd_minimized)
            }

            table = pd.DataFrame(d)
            sort = table.sort_values('RMSD', ascending=True)
            sort = sort.reset_index(drop=True)
            sort.to_csv(refer[0] + '.csv')
            print('data in file:', refer[0] + '.csv')
            print('-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- ')

            rog_diff = (float(
                max(sort['Radius_of_Gyration']) -
                sort['Radius_of_Gyration'][0]))

            lowest_rmsd.append(
                (sort['conformer'][0], sort['RMSD'][0], sort['O3A_value'][0],
                 sort['Torsional_Fingerprint'][0],
                 sort['Radius_of_Gyration'][0], sort['Initial_Energy'][0],
                 sort['Minimization_Energy'][0],
                 sort['RMSD_after_minimization'][0], rog_diff))

        except Exception:
            print('Something wrong with this reference or conformer')
            print('Omitting')
            pass

    print(
        'SAVING DATA OF LOWEST RMSD OF CONFORMERS ... ... ... ... ... ... ... ...'
    )
    summary = pd.DataFrame(data=lowest_rmsd,
                           columns=[
                               'Conformer', 'RMSD', 'O3A_value',
                               'Torsional_Fingerprint', 'Radius_of_Gyration',
                               'Initial_Energy', 'Minimization Energy',
                               'RMSD_after_minimization',
                               'Dif_Radious_of_Gyration'
                           ])
    summary.to_csv('Lowest_RMSD_Data.csv')
    print('Lowest RMSD Data in file: Lowest_RMSD_Data.csv')
    print('***************************************************')

    print(
        'SAVING STRUCTURES (RMSD, O3A, and MINIMIZATION) ... ... ... ... ... ... ... ... ...'
    )
    output_Ref = Chem.SDWriter('Aligned_Refrences.sdf')
    output_RMSD = Chem.SDWriter('RMSD_alignment.sdf')
    output_O3A = Chem.SDWriter('O3A_alignment.sdf')
    output_Min = Chem.SDWriter('Minimization.sdf')

    mol_references = list(set(mol_references))

    [output_Ref.write(element) for element in mol_references]
    output_Ref.close()
    [output_RMSD.write(element) for element in mol_RMSD]
    output_RMSD.close()
    [output_O3A.write(element) for element in mol_O3A]
    output_O3A.close()
    [output_Min.write(element) for element in mol_minimized]
    output_Min.close()

    print(
        'Structures in files: Aligned_Refrences.sdf, RMSD_alignment.sdf, O3A_alignment.sdf, and Minimization.sdf '
    )

    print(
        'ALL THE CALCULATIONS DONE, FILES SAVED. THANK YOU FOR USING THIS SCRIPT'
    )
Beispiel #5
0
def getSpherocityIndex(mol3D):
    try:
        return Descriptors3D.SpherocityIndex(mol3D)
    except:
        return "NA"
Beispiel #6
0
def getPMI3(mol3D):
    try:
        return Descriptors3D.PMI3(mol3D)
    except:
        return "NA"
Beispiel #7
0
def getNPR2(mol3D):
    try:
        return Descriptors3D.NPR2(mol3D)
    except:
        return "NA"
Beispiel #8
0
def getInertialShapeFactor(mol3D):
    try:
        return Descriptors3D.InertialShapeFactor(mol3D)
    except:
        return "NA"
Beispiel #9
0
def getAsphericity(mol3D):
    try:
        return Descriptors3D.Asphericity(mol3D)
    except:
        return "NA"
Beispiel #10
0
def get_descriptors(smiles):
    """
    Get a dictionary of RDKit descriptors from a SMILES string.

    Parameters
    ----------
    smiles : str
        The SMILES string of the chemical of interest

    Returns
    -------
    descriptors : dict
        A collection of molecular descriptors
    
    Notes: Developed with RDKit 2019.03.4, although doc pages listed 2019.03.1
    """

    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)

    Chem.EmbedMolecule(mol, Chem.ETKDG())

    descriptors = {}

    # Starting with simple descriptors:
    # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors.html

    # Molecular weight
    descriptors['molwt'] = Descriptors.ExactMolWt(mol)

    # Partial charge metrics
    descriptors['max_abs_partial_charge'] = Descriptors.MaxAbsPartialCharge(mol)
    descriptors['max_partial_charge'] = Descriptors.MaxPartialCharge(mol)
    descriptors['min_abs_partial_charge'] = Descriptors.MinAbsPartialCharge(mol)
    descriptors['min_partial_charge'] = Descriptors.MinPartialCharge(mol)

    # Basic electron counts
    descriptors['num_radical_electrons'] = Descriptors.NumRadicalElectrons(mol)
    descriptors['num_valence_electrons'] = Descriptors.NumValenceElectrons(mol)

    # 3-D descriptors
    # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors3D.html

    # Calculating these should produce the same result, according to some basic tests
    # descriptors['asphericity'] = rdMolDescriptors.CalcAsphericity(mol)
    # descriptors['eccentricity'] = rdMolDescriptors.CalcEccentricity(mol)
    descriptors['asphericity'] = Descriptors3D.Asphericity(mol)
    descriptors['eccentricity'] = Descriptors3D.Eccentricity(mol)

    descriptors['inertial_shape_factor'] = Descriptors3D.InertialShapeFactor(mol)

    descriptors['radius_of_gyration'] = Descriptors3D.RadiusOfGyration(mol)
    descriptors['spherocity_index'] = Descriptors3D.SpherocityIndex(mol)

    # Graph descriptors
    # https://www.rdkit.org/docs/source/rdkit.Chem.GraphDescriptors.html
    descriptors['balaban_j'] = GraphDescriptors.BalabanJ(mol)
    descriptors['bertz_ct'] = GraphDescriptors.BertzCT(mol)

    descriptors['chi0'] = GraphDescriptors.Chi0(mol)
    descriptors['chi0n'] = GraphDescriptors.Chi0n(mol)
    descriptors['chi0v'] = GraphDescriptors.Chi0v(mol)
    descriptors['chi1'] = GraphDescriptors.Chi1(mol)
    descriptors['chi1n'] = GraphDescriptors.Chi1n(mol)
    descriptors['chi1v'] = GraphDescriptors.Chi1v(mol)
    descriptors['chi2n'] = GraphDescriptors.Chi2n(mol)
    descriptors['chi2v'] = GraphDescriptors.Chi2v(mol)
    descriptors['chi3n'] = GraphDescriptors.Chi3n(mol)
    descriptors['chi3v'] = GraphDescriptors.Chi3v(mol)
    descriptors['chi4n'] = GraphDescriptors.Chi4n(mol)
    descriptors['chi4v'] = GraphDescriptors.Chi4v(mol)

    descriptors['hall_kier_alpha'] = GraphDescriptors.HallKierAlpha(mol)

    descriptors['kappa1'] = GraphDescriptors.Kappa1(mol)
    descriptors['kappa2'] = GraphDescriptors.Kappa2(mol)
    descriptors['kappa3'] = GraphDescriptors.Kappa3(mol)

    # Predicted properties from Wildman and Crippen
    descriptors['log_p'] = Descriptors.MolLogP(mol)
    descriptors['refractivity'] = Descriptors.MolMR(mol)

    return descriptors