예제 #1
0
    def _get_volume_overlap(self, cav_id, other_id, lig_id):
        """
        find the highest median bcv from all cavities, calculate percentage over between the best bcv
        and each query ligand

        :return:
        """

        def nonzero(val):
            if val == 0:
                return 1
            else:
                return val

        # inputs
        mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0]
        path1 = os.path.join(self.hotspot[cav_id], "out.zip")
        path2 = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip")
        thresholds = [10, 14, 17]

        if os.path.exists(path1) and os.path.exists(path2):
            bcv = HotspotReader(path2).read()
            hot = HotspotReader(path1).read()

            # tasks
            other = Grid.from_molecule(mol)

            bcv_sg = Grid.get_single_grid(bcv.super_grids, mask=False)
            bcv_overlap = bcv_sg._mutually_inclusive(other=other).count_grid()

            lig_vol = (other > 0).count_grid()
            bcv_vol = (bcv_sg > 0).count_grid()

            hot_sgs = [(Grid.get_single_grid(hot.super_grids, mask=False) > t)
                       for t in thresholds]
            hot_vols = [nonzero(hot_sg.count_grid())
                        for hot_sg in hot_sgs]
            hot_overlap = [hot_sg._mutually_inclusive(other=other).count_grid() for hot_sg in hot_sgs]

            # output
            with open(self.bcv_lig_overlaps[cav_id][other_id][lig_id], 'w') as writer:
                writer.write(str((bcv_overlap / lig_vol) * 100))

            with open(self.bcv_hot_overlaps[cav_id][other_id][lig_id], 'w') as writer:
                writer.write(str((bcv_overlap / bcv_vol) * 100))

            with open(self.hot_lig_overlaps[cav_id][other_id][lig_id], 'w') as writer:
                hot_lig = [str((a / lig_vol) * 100) for a in hot_overlap]
                print(hot_lig)
                writer.write(",".join(hot_lig))

            with open(self.hot_hot_overlaps[cav_id][other_id][lig_id], 'w') as writer:
                hot_hot = [str((hot_overlap[i] / hot_vols[i]) * 100) for i in range(len(thresholds))]
                writer.write(",".join(hot_hot))

        else:
            print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id))
예제 #2
0
    def __init__(self, hr, settings=None):
        if settings is None:
            self.settings = self.Settings()
        else:
            self.settings = settings
        self._single_grid = None
        self._masked_dic = None
        self.out_dir = None
        self.extracted_hotspots = None
        self.threshold = None

        try:
            hr.super_grids["negative"] = hr.super_grids[
                "negative"].deduplicate(hr.super_grids["acceptor"],
                                        threshold=10,
                                        tolerance=2)

            hr.super_grids["positive"] = hr.super_grids[
                "positive"].deduplicate(hr.super_grids["donor"],
                                        threshold=10,
                                        tolerance=2)
        except KeyError:
            pass

        self.hotspot_result = hr
        self._masked_dic, self._single_grid = Grid.get_single_grid(
            self.hotspot_result.super_grids)
예제 #3
0
def tractability_workflow(protein, tag):
    """
    A very simple tractability workflow.

    :param str protein: PDB identification code
    :param str tag: Tractability tag: either 'druggable' or 'less-druggable'
    :return: `pandas.DataFrame`
    """
    # 1) calculate Fragment Hotspot Result
    runner = Runner()
    result = runner.from_pdb(pdb_code=protein,
                             nprocesses=1,
                             buriedness_method='ghecom')

    # 2) calculate Best Continuous Volume
    extractor = Extractor(hr=result)
    bcv_result = extractor.extract_volume(volume=500)

    # 3) find the median score
    grid = Grid.get_single_grid(bcv_result.super_grids, mask=False)
    values = grid.grid_values(threshold=5)
    median = np.median(values)

    # 4) return the data
    return pd.DataFrame({
        'scores': values,
        'pdb': [protein] * len(values),
        'median': [median] * len(values),
        'tractability': [tag] * len(values),
    })
예제 #4
0
def augmentation(hr, hits):
    # create a grid which can contain all pharmacophore poses
    small_blank = Grid.initalise_grid(coords={
        atm.coordinates
        for mol in hits.hits for atm in mol.molecule.heavy_atoms
    },
                                      padding=3)
    # dilate the grids
    for p, g in hr.super_grids.items():
        hr.super_grids[p] = g.dilate_by_atom()

    # inflate
    prot_g = Grid.initalise_grid(
        [a.coordinates for a in hr.protein.heavy_atoms], padding=1)

    for p, g in hr.super_grids.items():
        hr.super_grids[p] = prot_g.common_boundaries(g)

    # shrink hotspot maps to save time
    sub_grids = {
        p: Grid.shrink(small=small_blank, big=g)
        for p, g in hr.super_grids.items()
    }

    # create single grid
    mask_dic, sg = Grid.get_single_grid(sub_grids)

    hr.super_grids = mask_dic

    # set background to 1
    hr.set_background()
    hr.normalize_to_max()
    return hr
예제 #5
0
 def score_hotspot(self, threshold=5, percentile=50):
     """
     Hotspot scored on the median value of all points included in the hotspot.
     NB: grid point with value < 5 are ommited from fragment hotspot map (hence the default threshold)
     :param percentile:
     :return:
     """
     sg = Grid.get_single_grid(self.hotspot_result.super_grids, mask=False)
     return sg.grid_score(threshold=threshold, percentile=percentile)
예제 #6
0
    def _calc_hotspots(self, return_probes=False):
        """
        handles the organisation of the hotspot calculation
        :param return_probes: optional, bool indicating if probe molecules should be returned
        :return:
        """
        print("Start atomic hotspot detection\n        Processors: {}".format(self.nprocesses))
        a = AtomicHotspot()
        a.settings.atomic_probes = {"apolar": "AROMATIC CH CARBON",
                                    "donor": "UNCHARGED NH NITROGEN",
                                    "acceptor": "CARBONYL OXYGEN"}
        if self.charged_probes:
            a.settings.atomic_probes = {"negative": "CARBOXYLATE OXYGEN", "positive": "CHARGED NH NITROGEN"}

        probe_types = a.settings.atomic_probes.keys()
        self.superstar_grids = a.calculate(protein=self.protein,
                                           nthreads=self.nprocesses,
                                           cavity_origins=self.cavities)

        print("Atomic hotspot detection complete\n")

        print("Start buriedness calcualtion")
        if self.buriedness_method.lower() == 'ghecom' and self.buriedness is None:
            print("    method: Ghecom")
            out_grid = self.superstar_grids[0].buriedness.copy_and_clear()
            b = Buriedness(protein=self.protein,
                           out_grid=out_grid)
            self.buriedness = b.calculate().grid
        elif self.buriedness_method.lower() == 'ligsite' and self.buriedness is None:
            print("    method: LIGSITE")
            self.buriedness = Grid.get_single_grid(grd_dict={s.identifier: s.buriedness for s in self.superstar_grids},
                                                   mask=False)

        self.weighted_grids = self._get_weighted_maps()

        print("Buriedness calcualtion complete\n")

        print("Start sampling")
        grid_dict = {w.identifier: w.grid for w in self.weighted_grids}

        for probe in probe_types:
            if return_probes:
                self.sampled_probes.update(probe, self._get_out_maps(probe, grid_dict))

            else:
                self._get_out_maps(probe, grid_dict)

        print("Sampling complete\n")
예제 #7
0
    def percentage_matched_atoms(self, mol, threshold, match_atom_types=True):
        """
        for a given molecule, the 'percentage match' is given by the percentage of atoms
        which overlap with the hotspot result (over a given overlap threshol)

        :param mol:
        :param threshold:
        :param match_atom_types:
        :return:
        """
        matched_atom_count = 0
        if match_atom_types:
            atom_type_dic = {}
            for n, g in self.super_grids.items():
                # if an atom is a donor and acceptor consider overlap twice
                atms = [
                    a for a in mol.heavy_atoms
                    if self.get_atom_type(a) == n or (
                        (n == 'donor' or n == 'acceptor')
                        and self.get_atom_type(a) == 'doneptor')
                ]

                if len(atms) > 0:

                    matched = g.matched_atoms(atoms=atms, threshold=threshold)
                    matched_atom_count += len(matched)
                    atom_type_dic.update({n: matched})

            print("heavy atoms matched: {}/{}".format(matched_atom_count,
                                                      len(mol.heavy_atoms)))
            print("breakdown by atom type", str(atom_type_dic))
            return round((matched_atom_count / len(mol.heavy_atoms)) * 100,
                         1), atom_type_dic
        else:
            sg = Grid.get_single_grid(self.super_grids, mask=False)
            matched = sg.matched_atoms(atoms=mol.heavy_atoms,
                                       threshold=threshold)
            matched_atom_count += len(matched)

            print("heavy atoms matched: {}/{}".format(matched_atom_count,
                                                      len(mol.heavy_atoms)))
            return round((matched_atom_count / len(mol.heavy_atoms)) * 100, 1)
예제 #8
0
    def __init__(self, hr, settings=None):
        if settings is None:
            self.settings = self.Settings()
        else:
            self.settings = settings
        self._single_grid = None
        self._masked_dic = None
        self.out_dir = None
        self.extracted_hotspots = None
        self.threshold = None

        if self.settings.mvon is True:
            hr.super_grids.update({
                probe: g.max_value_of_neighbours()
                for probe, g in hr.super_grids.items()
            })
            #hr.super_grids.update({probe: g.dilate_by_atom() for probe, g in hr.super_grids.items()})

        try:
            hr.super_grids["negative"] = hr.super_grids[
                "negative"].deduplicate(hr.super_grids["acceptor"],
                                        threshold=10,
                                        tolerance=2)

            hr.super_grids["positive"] = hr.super_grids[
                "positive"].deduplicate(hr.super_grids["donor"],
                                        threshold=10,
                                        tolerance=2)
        except KeyError:
            pass

        try:
            hr.super_grids.update(
                {probe: g.minimal()
                 for probe, g in hr.super_grids.items()})
        except RuntimeError:
            pass

        self.hotspot_result = hr
        self._masked_dic, self._single_grid = Grid.get_single_grid(
            self.hotspot_result.super_grids)
예제 #9
0
    def _docking_fitting_pts(self, _best_island=None, threshold=17):
        """

        :return:
        """
        if _best_island:
            single_grid = _best_island
        else:
            single_grid = Grid.get_single_grid(self.super_grids, mask=False)
        dic = single_grid.grid_value_by_coordinates(threshold=threshold)

        mol = Molecule(identifier="constraints")
        for score, v in dic.items():
            for pts in v:
                atm = Atom(atomic_symbol='C',
                           atomic_number=14,
                           label='{:.2f}'.format(score),
                           coordinates=pts)
                atm.partial_charge = score
                mol.add_atom(atom=atm)
        return mol
예제 #10
0
    def testscore_atoms_as_spheres(self):
        with PushDir("testdata/result/data"):
            mols = [m for m in MoleculeReader("gold_docking_poses.sdf")]

            # create a grid which can contain all docking poses
            small_blank = Grid.initalise_grid(coords={
                atm.coordinates
                for mol in mols for atm in mol.heavy_atoms
            },
                                              padding=2)

            # read hotspot maps
            with HotspotReader(path="out.zip") as r:
                self.result = r.read()

            # dilate the grids
            for p, g in self.result.super_grids.items():
                self.result.super_grids[p] = g.dilate_by_atom()

            # shrink hotspot maps to save time
            sub_grids = {
                p: Grid.shrink(small=small_blank, big=g)
                for p, g in self.result.super_grids.items()
            }

            # create single grid
            mask_dic, sg = Grid.get_single_grid(sub_grids)

            self.result.super_grids = mask_dic

            # set background to 1
            self.result.set_background()
            self.result.normalize_to_max()

            print([g.extrema for p, g in self.result.super_grids.items()])

            for m in mols[:1]:
                s = self.result.score_atoms_as_spheres(m, small_blank)
                print(s)
예제 #11
0
def augmentation(hr, entries):
    # create a grid which can contain all docking poses
    coords = set()
    for i, entry in enumerate(entries):
        for atm in entry.molecule.heavy_atoms:
            coords.add(atm.coordinates)
        if i > 100:
            break

    small_blank = Grid.initalise_grid(coords=coords, padding=12)
    # dilate the grids
    # for p, g in hr.super_grids.items():
    #     hr.super_grids[p] = g.dilate_by_atom()

    # inflate
    prot_g = Grid.initalise_grid(
        [a.coordinates for a in hr.protein.heavy_atoms], padding=1)

    for p, g in hr.super_grids.items():
        hr.super_grids[p] = prot_g.common_boundaries(g)

    # shrink hotspot maps to save time
    sub_grids = {
        p: Grid.shrink(small=small_blank, big=g)
        for p, g in hr.super_grids.items()
    }

    # create single grid
    mask_dic, sg = Grid.get_single_grid(sub_grids)

    hr.super_grids = mask_dic

    # set background to 1
    hr.set_background()
    hr.normalize_to_max()
    return hr
예제 #12
0
    def single_grid_result(self):

        _masked_dic, _single_grid = Grid.get_single_grid(self.super_grids)

        grid_dict = _single_grid.inverse_single_grid(_masked_dic)
        self.super_grids = grid_dict