Example #1
0
 def rmsd(self, ligand, reference):
     """
     prepares molecules for rmsd calculation
     :param a:
     :param b:
     :return:
     """
     return MolecularDescriptors.rmsd(self._clean_mol(ligand), reference)
Example #2
0
 def rdkit_rmsd(self, conformer, expected):
     try:
         rd_conformer = etkdg_generator.ccdc_to_rdkit(conformer)
         rd_expected = etkdg_generator.ccdc_to_rdkit(expected)
         rmsd = AllChem.GetBestRMS(rd_expected, rd_conformer)
         aligned_conf = etkdg_generator.rdkit_conformer_to_ccdc(
             rd_conformer, -1)
         return (aligned_conf, rmsd)
     except:
         return MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto(
             conformer, expected, rotate_torsions=False)
 def docks_to_ref_rmsd(self):
     # Only calculate for complete docking results!
     docks = [l.molecule for l in self.docking_result.ligands]
     ref_lig = MoleculeReader(self.prepared_ligand_path)[0]
     rmsds = [
         MolecularDescriptors.rmsd(ref_lig,
                                   nd,
                                   exclude_hydrogens=True,
                                   atoms=self.match_heavy_atoms(
                                       ref_lig, nd)) for nd in docks
     ]
     return rmsds
Example #4
0
    def generate_ccdc_start_points_from_expected(self, expected_folder,
                                                 output_folder, results_stream,
                                                 add_hydrogens,
                                                 no_assign_bond_types):
        """
            Some simple code to use our 1D->3D methods to generate start points
        """
        if output_folder is not None:
            if not os.path.exists(output_folder):
                os.mkdir(output_folder)

        first = True
        for f in os.listdir(expected_folder):
            if os.path.splitext(f)[1] == ".mol2":
                expected_mol_filename = os.path.join(expected_folder, f)
                output_mol_filename = os.path.join(output_folder,
                                                   os.path.basename(f))

                input_molecule = self._molecule(expected_mol_filename,
                                                no_assign_bond_types)
                if add_hydrogens:
                    input_molecule.add_hydrogens()
                molid = input_molecule.identifier

                mol = Molecule.from_molecule(input_molecule)

                with MoleculeWriter(output_mol_filename) as mol_writer:
                    mol_writer.write(mol)

                expected_molecule = self._molecule(expected_mol_filename,
                                                   no_assign_bond_types)
                overlay = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto(
                    mol, expected_molecule, rotate_torsions=True)
                if first:
                    results_stream.write(
                        'id,rotated torsion rmsd,rotated torsion rmsd tanimoto\n'
                    )
                    first = False

                results_stream.write(
                    string.join(
                        [molid, str(overlay[1]),
                         str(overlay[2])], ',') + '\n')
Example #5
0
def simpleRelabelling(stringIn, crystal):# replacementDict):
    ''' Simple relabelling assumes that you can just add nAtomType to each label
        i.e. if 2 O atoms you can assume O1 will be O3, O5, O7 in other molecules etc '''
#    atomsPerMol = dict([re.split('(\d+)', x)[:2] for x in crystal.molecule.formula.split()])
    atomsPerMol = dict([re.split('(\d+)', x)[:2] for x in crystal.molecule.components[0].formula.split()])
#    print atomsPerMol
#    print " ".join(["_".join(x.split("_")) for x in stringIn.split()])

#    def labelIncludingNumberMols(atom1Label, stoichiometry, molIndex):
#        element1, number1 = re.split('(\d+)', atom1Label)[:2]
#        return element1 + str(int(number1) + int(stoichiometry[element1]) * molIndex)
            
    for i2 in xrange(len(crystal.molecule.components)):
        print " +".join(["_".join([labelIncludingNumberMols(y, atomsPerMol, i2) for y in x.split("_")]) for x in stringIn.split()])
        
    return
            
        
    return dict([[pair[1].label, labelIncludingNumberMols(pair[0].label, atomsPerMol, i2)]
                 for mol1 in crystal1.molecule.components
                 for i2, mol2 in enumerate(crystal2.molecule.components)
                 for pair in MolecularDescriptors.MaximumCommonSubstructure().search(mol1, mol2)[0]])
Example #6
0
    def generate_ccdc_start_points(self, id_smilescode_pairs, expected_folder,
                                   output_folder, results_stream,
                                   no_assign_bond_types):
        """
            Some simple code to use our 1D->3D methods to generate start points
        """
        if output_folder is not None:
            if not os.path.exists(output_folder):
                os.mkdir(output_folder)

        first = True
        for pair in id_smilescode_pairs:
            molid = pair[0]
            smiles = pair[1]
            mol = Molecule.from_smiles(smiles, molid, True)

            if output_folder is not None:
                output_file = os.path.join(output_folder, '%s.mol2' % molid)
                with MoleculeWriter(output_file) as mol_writer:
                    mol_writer.write(mol)

            if expected_folder is not None:
                expected_mol_filename = os.path.join(expected_folder,
                                                     "%s.mol2" % molid)
                expected_molecule = self._molecule(expected_mol_filename,
                                                   no_assign_bond_types)
                overlay = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto(
                    mol, expected_molecule, rotate_torsions=True)
                if first:
                    results_stream.write(
                        'id,rotated torsion rmsd,rotated torsion rmsd tanimoto\n'
                    )
                    first = False

                results_stream.write(
                    string.join(
                        [molid, str(overlay[1]),
                         str(overlay[2])], ',') + '\n')
Example #7
0
def replacementDict(crystal1, crystal2):
    ''' Doesnt seem to consider 3d structure?? so may mix up enantiomers, or quasi-enantiomers
    ie those where the labelling of atoms makes them independent
    Presumably, as used as a dictionary, mol1 <-> mol2 makes little diff??? '''

    import re
    #only works with crystal1 being z'=1 and crystal2 being same molecule but many times
    assert (len(crystal1.molecule.components) == 1)
    assert (all([
        len(x.atoms) == len(crystal1.molecule.atoms)
        for x in crystal2.molecule.components
    ]))

    def labelIncludingNumberMols(atom1Label, stoichiometry, molIndex):

        if len(re.split('(\d+)', atom1Label)) > 1:
            element1, number1 = re.split('(\d+)', atom1Label)[:2]

        # It may be possible that something doesn't have a number- give it '1' - watch here if other bugs arise
        else:
            element1, number1 = re.split('(\d+)', atom1Label)[0], u'1'

        return element1 + str(
            int(number1) + int(stoichiometry[element1]) * molIndex)

    atomsPerMol = dict(
        [re.split('(\d+)', x)[:2] for x in crystal1.molecule.formula.split()])
    #    return dict([[labelIncludingNumberMols(pair[0].label, atomsPerMol, i2), pair[1].label]
    return dict(
        [[
            pair[1].label,
            labelIncludingNumberMols(pair[0].label, atomsPerMol, i2)
        ] for mol1 in crystal1.molecule.components
         for i2, mol2 in enumerate(crystal2.molecule.components)
         for pair in MolecularDescriptors.MaximumCommonSubstructure().search(
             mol1, mol2)[0]])
Example #8
0
import os
from ccdc import io
from ccdc.descriptors import MolecularDescriptors


if __name__ == "__main__":

    pdb = "4G46"
    base = f"/local/pcurran/leads_frag/{pdb}"
    #  test
    mol1 = io.MoleculeReader(os.path.join(base, f"{pdb}_ligand.mol2"))[0]
    mol2 = io.MoleculeReader(os.path.join(base, f"{pdb}_ref.mol2"))[0]

    mol3 = io.MoleculeReader(os.path.join(base, "gold/goldscore/data/ranked_4G46_ligand_m1_1.mol2"))[0]

    rm = []

    for atm in mol3.heavy_atoms:
        if atm.label == "****":
            rm.append(atm)

    mol3.remove_atoms(rm)

    print([atm.label for atm in mol1.heavy_atoms])
    print([atm.label for atm in mol2.heavy_atoms])
    print([atm.label for atm in mol3.heavy_atoms])

    a = MolecularDescriptors.rmsd(mol1, mol3)

Example #9
0
        conformers_mol = [mol]
else:
    # Run minimisation
    print('Minimising molecular geometry using Tripos force field...')
    molecule_minimiser = conformer.MoleculeMinimiser(
    )  # Uses Tripos force field
    min_conformers = []
    log_data = {}
    for conf_idx, conf in enumerate(conformers):
        score, rmsd = conf.normalised_score, conf.rmsd(
        )  # Conformer score and RMSD
        min_conf = molecule_minimiser.minimise(
            conf.molecule)  # Minimise conformer
        min_conformers.append(min_conf)  # Add to list
        min_rmsd = round(
            MolecularDescriptors.rmsd(mol, min_conf), 3
        )  # Minimized RMSD (!!this method gives different results compared to conf.rmsd())
        conf_name = '%s-%i' % (mol_name, conf_idx + 1)
        log_data[conf_name] = [score, rmsd, min_rmsd]
        conf_path = os.path.join(conformers_dir,
                                 '%s.%s' % (conf_name, args.format))
        with io.MoleculeWriter(conf_path) as molecule_writer:
            molecule_writer.write(min_conf)
    conformers_mol = min_conformers
    if args.log:
        print('Saving log file (log.yml)...')
        with open('log.yml', 'w') as log:
            yaml.dump(log_data, log)
    print('Conformers saved in %s | format: %s\n' %
          (conformers_dir, args.format))
Example #10
0
def do_dock(args):
    pdb, data_dir, fit_pts, fit_pts_path, run_id = args

    protein_path = os.path.join(data_dir, pdb, f"{pdb}_receptor.mol2")
    ligand_path = os.path.join(data_dir, pdb, f"{pdb}_ligand.mol2")
    ref_path = os.path.join(data_dir, pdb, f"{pdb}_ref.mol2")

    if fit_pts == "1":
        fp_path = os.path.join(data_dir, pdb, fit_pts_path)
        assert os.path.exists(fp_path)

    else:
        fp_path = "fit_pts.mol2"

    scoring_funcs = ["goldscore", "chemscore", "asp", "plp"]
    auto_scale = 1
    gold_exe = "/local/pcurran/CCDC/Custom_GOLD/discovery-build-developer/bin/gold_auto"

    dock_func = []
    rescor_func = []
    rmsds = []

    for scor in scoring_funcs:
        for rescor in scoring_funcs:

            outdir = check_dir(os.path.join(data_dir, pdb, run_id))

            if scor == rescor:
                rescor = None
                outdir = check_dir(os.path.join(outdir, f"{scor}"))

            else:
                outdir = check_dir(os.path.join(outdir, f"{scor}_{rescor}"))

            dump_dir = check_dir(os.path.join(outdir, "data"))

            conf_file = template(auto_scale,
                                 ref_path,
                                 ligand_path,
                                 protein_path,
                                 scor,
                                 rescor,
                                 dump_dir=dump_dir,
                                 fit_pts=fp_path,
                                 fit=fit_pts)

            with open(os.path.join(outdir, "gold.conf"), "w") as w:
                w.write(conf_file)

            cmd = f"{gold_exe} {outdir}/gold.conf"
            os.system(cmd)

            ref_path = os.path.join(data_dir, pdb, f"{pdb}_ref-ligand.pdb")
            ref = MoleculeReader(ref_path)[0]
            docks = [remove_dummy_atoms(
                MoleculeReader(os.path.join(dump_dir, f"ranked_{pdb}_ligand_m1_{i}.mol2"))[0]
            )
                for i in range(1, 31)]

            r = [MolecularDescriptors.rmsd(ref, dock, exclude_hydrogens=True) for dock in docks]

            dock_func.append(scor)
            rescor_func.append(rescor)
            rmsds.append(r)

    runs = len(scoring_funcs) ** 2
    ranked_rmsds = zip(*rmsds)

    data = {"pdbs": [pdb] * runs,
            "run": [run_id] * runs,
            "dock_func": dock_func,
            "rescor_func": rescor_func}

    data.update({f"r{i}": x for i, x in enumerate(ranked_rmsds)})

    df = pd.DataFrame(data)
    df.to_csv(os.path.join(data_dir, pdb, run_id, "results.csv"))
Example #11
0
    def evaluate_molecule(self, molid, input_molecule, expected_molecule,
                          results_folder, cg_settings, save_best, save_all,
                          generator, rmsd_method):
        """
           Runs the conformer generator on the input molecule and then reports statistics
           for the resultant conformers as compared to the expected molecule.

           returns a tuple containing overlay results for the first conformer generated,
        """

        #global_logger.set_ccdc_log_level(1)

        # Assess Input RMSD

        input_overlay = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto(
            input_molecule, expected_molecule, rotate_torsions=True)
        input_overlay_with_inversion = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto(
            input_molecule,
            expected_molecule,
            invert=True,
            rotate_torsions=True)

        without_inversion_input_rmsd = input_overlay[1]
        best_input_rmsd = min(without_inversion_input_rmsd,
                              input_overlay_with_inversion[1])

        settings = conformer_module.ConformerSettings()
        if cg_settings is not None:
            settings = cg_settings
        print('Generating', molid)
        start = time.time()
        if generator == 'etkdg':
            if not etkdg_available:
                raise runtime_error('etkdg is not available')
            conformer_generator = etkdg_generator.EtkdgGenerator(settings)
        else:
            conformer_generator = conformer_module.ConformerGenerator(settings)
        conformers = conformer_generator.generate(input_molecule)
        end = time.time()
        gentime = end - start
        print(molid, 'Took', gentime)

        #global_logger.set_ccdc_log_level(0)

        conformer_number = 0
        rank_of_best = 0
        best_overlay_result = None
        best_overlay_molecule = None
        first_overlay_result = None
        rank_of_first_with_rmsd_less_than = {}
        for threshold in self.thresholds:
            rank_of_first_with_rmsd_less_than[threshold] = -1

        ensemble_size = len(conformers)

        for conformer in conformers:
            conformer_number = conformer_number + 1

            if rmsd_method == "rdkit":
                overlay = self.rdkit_rmsd(conformer.molecule,
                                          expected_molecule)
            else:
                overlay = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto(
                    conformer.molecule,
                    expected_molecule,
                    rotate_torsions=False)

            if conformer_number == 1:
                first_overlay_result = overlay

            if best_overlay_result is None or overlay[1] < best_overlay_result[
                    1]:
                best_overlay_result = overlay
                rank_of_best = conformer_number - 1
                best_overlay_molecule = conformer.molecule

            for key in rank_of_first_with_rmsd_less_than.keys():
                if rank_of_first_with_rmsd_less_than[key] == -1:
                    if overlay[1] < key:
                        rank_of_first_with_rmsd_less_than[
                            key] = conformer_number

        if results_folder is not None:
            if save_best:
                filename = os.path.join(results_folder, "%s_best.mol2" % molid)
                with MoleculeWriter(filename) as mol_writer:
                    mol_writer.write(conformers[rank_of_best].molecule)

            if save_all:
                filename = os.path.join(results_folder, "%s_all.mol2" % molid)
                with MoleculeWriter(filename) as mol_writer:
                    for conf in conformers:
                        mol_writer.write(conf.molecule)

        input_with_best = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto(
            best_overlay_molecule, input_molecule, rotate_torsions=False)
        probably_input = input_with_best[1] < 0.001

        degrees_of_freedom = conformers.n_flexible_rings_in_molecule + conformers.n_rotamers_in_molecule

        return {
            'first_overlay_result': first_overlay_result,
            'best_overlay_result': best_overlay_result,
            'rank_of_first_with_rmsd_less_than':
            rank_of_first_with_rmsd_less_than,
            'ensemble_size': ensemble_size,
            'gentime': gentime,
            'probably_input': probably_input,
            'without_inversion_input_rmsd': without_inversion_input_rmsd,
            'best_input_rmsd': best_input_rmsd,
            'rank_of_best': rank_of_best + 1,
            'degrees_of_freedom': degrees_of_freedom
        }
    print("Processed file --> " + file_name)
    crystal_reader = CrystalReader(file_path)
    crystal = crystal_reader[0]
    crystal.assign_bonds()
    packed_molecules = crystal.packing(box_dimensions=((0, 0, 0), (1, 1, 1)),
                                       inclusion='CentroidIncluded')
    packed_molecules.normalise_labels()

    adta_molecules = []
    cent = []
    cent_points = []
    for comp in packed_molecules.components:
        if (len(comp.atoms) > 1):
            adta_molecules.append(comp)
            cent.append(MD.atom_centroid(*list(a for a in comp.atoms)))

    for c in cent:
        cent_points.append([round(c[0], 3), round(c[1], 3), round(c[2], 3)])

#---------------------------------------------------------------------
#FOR FILES start TO end IN THE INPUT DIRECTORY
#EXPORT PAIRWISE COMPARISON TABLES FOR RMSD, POWDER AND LK
#IF ALL SET TO TRUE.

#DEFAULT SETTINGS RUN POWDER AND RMSD FOR ALL FILES IN DIRECTORY
#---------------------------------------------------------------------


def linkcomp(start, end, pcomp=True, rcomp=True, lktest=False):