Python Helper Beispiele, hotspots.hs_utilities.Helper Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: calculation.py Projekt: pslacerda/hotspots

    def cavities(self, obj):
        """
        optional settings, cavities supplied to the calculation
        :param list or Coordinate or `ccdc.molecule.Molecule` or `ccdc.cavity.Cavity`: cavity information provided
        :return:
        """
        if obj is not None:
            if isinstance(obj, list) or isinstance(obj, tuple):
                if isinstance(obj, Coordinates):
                    try:
                        print(obj.x)
                        self._cavities = [obj]
                    except AttributeError:
                        self._cavities = obj

                    self._cavities = [obj]
                elif isinstance(obj[0], Molecule):
                    self._cavities = [m.centre_of_geometry() for m in obj]
                elif isinstance(obj[0], Cavity):
                    self._cavities = [Helper.cavity_centroid(c) for c in obj]
                else:
                    print("WARNING! Failed to detected cavity, Atomic Hotspot detection to run on whole protein")
                    self._cavities = None

            elif isinstance(obj, Molecule):
                self._cavities = [obj.centre_of_geometry()]
            elif isinstance(obj, Cavity):
                self._cavities = [Helper.cavity_centroid(obj)]

            else:
                print("WARNING! Failed to detected cavity, Atomic Hotspot detection to run on whole protein")
                self._cavities = None
        else:
            self._cavities = None

Beispiel #2

0

Datei anzeigen

Datei: hs_pharmacophore.py Projekt: pslacerda/hotspots

    def get_projected_coordinates(feature_type, feature_coordinates, protein,
                                  settings):
        """
        for a given polar feature, the nearest h-bonding partner on the protein is located.
        :param protein: a :class:`ccdc.protein.Protein` instance
        :return: feature_coordinates for hydrogen-bonding partner
        """
        if feature_type == 'donor':
            atms = [a for a in protein.atoms if a.is_acceptor]
        else:
            atms = [a for a in protein.atoms if a.is_donor]

        near_atoms = {}
        for atm in atms:
            dist = Helper.get_distance(atm.coordinates, feature_coordinates)
            if dist < settings.max_hbond_dist:
                if dist in near_atoms.keys():
                    near_atoms[dist].append(atm)
                else:
                    near_atoms.update({dist: [atm]})
            else:
                continue
        if len(near_atoms.keys()) == 0:
            return None

        else:
            closest = sorted(near_atoms.keys())[0]
            select = near_atoms[closest][0]
            return select.coordinates

Beispiel #3

0

Datei anzeigen

def _molecule_as_grid(mol, g=None):
    """
    Produces a grid representation of a molecule split by interaction type

    :param mol: takes any ccdc molecule
    :type mol: `ccdc.molecule.Molecule`
    :param g: a blank grid
    :type g: `hotspots.grid_extension.Grid`

    :return: a dictionary of grids by interaction type
    :rtype: dict
    """
    if not g:
        g = Grid.initalise_grid(coords=[a.coordinates for a in mol.atoms],
                                padding=3)

    grid_dict = {"donor": g.copy(), "acceptor": g.copy(), "apolar": g.copy()}

    for p, g in grid_dict.items():
        atms = [a for a in mol.atoms if Helper.get_atom_type(a) == p]
        for atm in atms:
            g.set_sphere(point=atm.coordinates,
                         radius=atm.vdw_radius,
                         value=1,
                         scaling='None')

    return grid_dict

Beispiel #4

0

Datei anzeigen

Datei: grid_extension.py Projekt: prcurran/hotspots

    def neighbourhood(i, j, k, high, catchment=1):
        """
        find the neighbourhood of a given indice. Neighbourhood is defined by all points within 1 step of the
        specified indice. This includes the cubic diagonals.

        :param i: i indice
        :param j: j indice
        :param k: k indice
        :param catchment: number of steps from the centre

        :type i: int
        :type j: int
        :type k: int
        :type catchment: int

        :return: indices of the neighbourhood
        :rtype: list
        """
        low = (0, 0, 0)

        i_values = [
            a for a in range(i - catchment, i + catchment + 1)
            if low[0] <= a < high[0]
        ]
        j_values = [
            b for b in range(j - catchment, j + catchment + 1)
            if low[1] <= b < high[1]
        ]
        k_values = [
            c for c in range(k - catchment, k + catchment + 1)
            if low[2] <= c < high[2]
        ]

        return [[a, b, c] for a in i_values for b in j_values for c in k_values
                if Helper.get_distance([a, b, c], [i, j, k]) == 1]

Beispiel #5

0

Datei anzeigen

    def _get_atomic_overlap(self, cav_id, other_id, lig_id):
        """
        find the highest median bcv from all cavities, calculate percentage over between the best bcv
        and each query ligand

        :return:
        """
        # inputs
        mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0]
        path = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip")
        if os.path.exists(path):
            hr = HotspotReader(path).read()

            # tasks
            out = hr.atomic_volume_overlap(mol)

        else:
            print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id))
            out = {"donor": {}, "acceptor": {}, "apolar": {}}
            for a in mol.heavy_atoms:
                t = Helper.get_atom_type(a)
                if t == "doneptor":
                    out["donor"].update({a.label: 0.0})
                    out["acceptor"].update({a.label: 0.0})
                else:
                    out[t].update({a.label: 0.0})

        # output
        with open(self.atomic_overlaps[cav_id][other_id][lig_id], 'w') as writer:
            writer.write(str(out))

Beispiel #6

0

Datei anzeigen

Datei: grid_extension.py Projekt: jurgjn/hotspots

    def deduplicate(self, major, threshold=12, tolerance=2):
        """
        method to deduplicate two grids, used for charged-polar deduplication
        :param `ccdc.utilities.Grid` major: overriding grid
        :param int threshold: values above this value
        :param int tolerance: search radius for determining feature overlap
        :return:
        """
        if self.bounding_box[0] != major.bounding_box[0] or self.bounding_box[1] != major.bounding_box[1]:
            self = major.common_boundaries(self)

        all_islands = set([jsland for jsland in self.islands(threshold=threshold)])
        bin_islands = set([jsland for jsland in all_islands
                           for island in major.islands(threshold=threshold)
                           if jsland.contains_point(island.centroid(), tolerance=tolerance)
                           or jsland.count_grid() <= 8
                           or Helper.get_distance(jsland.centroid(), island.centroid()) < 4])

        retained_jslands = list(all_islands - bin_islands)

        if len(retained_jslands) == 0:
            blank = major.copy_and_clear()
            return blank
        else:
            temp = Grid.super_grid(0, *retained_jslands)
            blank = self.copy_and_clear()
            return blank.common_boundaries(temp)

Beispiel #7

0

Datei anzeigen

Datei: hs_io.py Projekt: yassine-laguel/hotspots

    def _single_write(self, path, hr):
        hr.out_dir = Helper.get_out_dir(join(path, hr.identifier))

        self._write_grids(hr)
        self._write_protein(hr.out_dir, hr.protein)

        relpath = f'{hr.identifier}'
        self._write_pymol_objects(relpath, hr)

Beispiel #8

0

Datei anzeigen

    def _single_write(self, path, hr):
        hr.out_dir = Helper.get_out_dir(join(path, hr.identifier))

        self._write_grids(hr)
        self._write_protein(hr.out_dir, hr.protein)
        if basename(path) == 'out' and self.zip_results==False:
            relpath = join(basename(path), f'{hr.identifier}')
        else:
            relpath = f'{hr.identifier}'
        self._write_pymol_objects(relpath, hr)

Beispiel #9

0

Datei anzeigen

        def get_priority_atom(self, molecule):
            """
            Select priority atom. Select polar atom. If multiple polar atoms, select the one furthest from the centre of
            geometry. If no polar atoms, select atom furthest from centre of geometry

            :param molecule: a :class: `ccdc.molecule.Molecule` instance
            :return: a :class: `ccdc.molecule.Molecule` instance, str atom type
            """
            c = molecule.centre_of_geometry()
            polar_atoms = [
                a for a in molecule.atoms if a.is_donor or a.is_acceptor
            ]
            atom_by_distance = {}
            if len(polar_atoms) > 0:
                for a in polar_atoms:
                    d = Helper.get_distance(c, a.coordinates)
                    atom_by_distance[d] = a
            else:
                for a in molecule.atoms:
                    d = Helper.get_distance(c, a.coordinates)
                    atom_by_distance[d] = a

            greatest_distance = sorted(atom_by_distance.keys())[0]
            priority_atom = atom_by_distance[greatest_distance]

            pa_type = None
            if priority_atom.formal_charge != 0:
                if priority_atom.formal_charge < 0:
                    pa_type = "negative"
                elif priority_atom.formal_charge > 0:
                    pa_type = "positive"
            else:
                if priority_atom.is_acceptor:
                    pa_type = "acceptor"
                elif priority_atom.is_donor:
                    pa_type = "donor"
                else:
                    pa_type = "apolar"

            return priority_atom, pa_type

Beispiel #10

0

Datei anzeigen

Datei: calculation.py Projekt: pslacerda/hotspots

 def update_grid(self):
     """
     reads the output file from the pocket detection and assigns values to a grid
     :return: None
     """
     lines = Helper.get_lines_from_file(self.settings.out_name)
     for line in lines:
         if line.startswith("HETATM"):
             coordinates = (float(line[31:38]), float(line[39:46]), float(line[47:54]))
             rinacc = float(line[61:66])
             i, j, k = self.grid.point_to_indices(coordinates)
             x, y, z = self.grid.nsteps
             if 0 < i < x and 0 < j < y and 0 < k < z:
                 self.grid.set_value(i, j, k, 9.5 - rinacc)

Beispiel #11

0

Datei anzeigen

Datei: get_reference_grids.py Projekt: zeromtmu/phd-scripts

def to_grid(target, pdb):
    out_dir = "Z:/patel_set/{}/{}".format(target, pdb)
    mols = MoleculeReader(
        join(out_dir, "reference_pharmacophore", "aligned_mols.mol2"))
    p = PharmacophoreModel.from_ligands(ligands=mols, identifier="test")
    result = Results(super_grids=p.dic,
                     protein=Protein.from_file(
                         join(out_dir, "hs", "{}.pdb".format(pdb))))

    out = Helper.get_out_dir(join(out_dir, "reference_pharmacophore", "grids"))

    settings = HotspotWriter.Settings()
    settings.isosurface_threshold = [2, 5, 10]

    with HotspotWriter(path=out, zip_results=True, settings=settings) as w:
        w.write(result)

Beispiel #12

0

Datei anzeigen

Datei: hs_io.py Projekt: yassine-laguel/hotspots

    def write(self, hr):
        """
        writes the Fragment Hotspot Maps result to the output directory and create the pymol visualisation file

        :param hr: a Fragment Hotspot Maps result or list of results
        :type hr: `hotspots.result.Result`

        >>> from hotspots.calculation import Runner
        >>> from hotspots.hs_io import HotspotWriter

        >>> r = Runner
        >>> result = r.from_pdb("1hcl")
        >>> out_dir = <path_to_out>
        >>> with HotspotWriter(out_dir) as w:
        >>>     w.write(result)

        """
        container = Helper.get_out_dir(join(self.path,
                                            self.settings.container))

        if isinstance(hr, list):
            print(hr)
            if len({h.identifier for h in hr}) != len(hr):
                # if there are not unique identifiers, create some.
                for i, h in enumerate(hr):
                    h.identifier = f"hotspot-{i}"
            for h in hr:
                self._single_write(container, h)

        else:
            if not hr.identifier:
                hr.identifier = "hotspot"
            self._single_write(container, hr)

        self._write_pymol_isoslider(hr)
        self.pymol_out.commands += PyMOLCommands.background_color(
            self.settings.bg_color)
        self.pymol_out.commands += PyMOLCommands.push_to_wd()

        if self.zip_results:
            self.compress()

        self.pymol_out.write(join(self.path, "pymol_file.py"))

Beispiel #13

0

Datei anzeigen

    def pdb_to_grid(path, template):
        """
        converts pdb file to grid

        :param path: path to the input PDB
        :param template: empty grid, NB: must have same grid spec as superstar grids
        :type path: str
        :type template: `hotspots.grid_extension.Grid`

        :return: populated grid
        :rtype: `hotspots.grid_extension.Grid`
        """

        lines = Helper.get_lines_from_file(path)
        for line in lines:
            if line.startswith("HETATM"):
                coordinates = (float(line[31:38]), float(line[39:46]), float(line[47:54]))
                rinacc = float(line[61:66])
                i, j, k = template.point_to_indices(coordinates)
                nx, ny, nz = template.nsteps
                if 0 < i < nx and 0 < j < ny and 0 < k < nz:
                    template.set_value(i, j, k, 9.5 - rinacc)
        return template

Beispiel #14

0

Datei anzeigen

    def _get_cavities(self, min_vol):
        """
        detect cavities using Cavity API, generate new directory for each cavity

        :return: None
        """

        # inputs
        cavs = [c for c in Cavity.from_pdb_file(self.apo_prep) if c.volume > min_vol]

        # task
        for i in range(len(cavs)):
            create_directory(path=os.path.join(self.working_dir, 'cavity_{}'.format(i)))

        cav_dic = {os.path.join(self.working_dir, 'cavity_{}'.format(i)): Helper.cavity_centroid(c)
                   for i, c in enumerate(cavs)}

        cav_volume_dic = {os.path.join(self.working_dir, 'cavity_{}'.format(i), "cavity.volume"): c.volume
                          for i, c in enumerate(cavs)}

        cav_bb = {os.path.join(self.working_dir, 'cavity_{}'.format(i), "bounding_box.pkl"): c.bounding_box
                  for i, c in enumerate(cavs)}

        # output
        for path, origin in cav_dic.items():
            with open(os.path.join(path, "cavity_origin.pkl"), 'wb') as handle:
                pickle.dump(origin, handle)

        for path, vol in cav_volume_dic.items():
            with open(os.path.join(path), 'w') as f:
                f.write(str(vol))

        for path, bb in cav_bb.items():
            with open(os.path.join(path), 'wb') as h:
                pickle.dump(bb, h)

        # update attr
        self.runs += ["cavity_{}".format(i) for i in range(len(cavs))]

        self.cavities = {
            "cavity_{}".format(p): os.path.join(self.working_dir, 'cavity_{}'.format(p), "cavity_origin.pkl")
            for p in range(len(cav_dic))}
        self.cavities_volumes = {
            "cavity_{}".format(p): os.path.join(self.working_dir, 'cavity_{}'.format(p), "cavity.volume")
            for p in range(len(cav_dic))}
        self.cavity_score = {
            "cavity_{}".format(p): os.path.join(self.working_dir, "cavity_{}".format(p), "cavity.score")
            for p in range(len(cav_dic))}
        self.bounding_box = {
            "cavity_{}".format(p): os.path.join(self.working_dir, 'cavity_{}'.format(p), "bounding_box.pkl")
            for p in range(len(cav_dic))}

        self.superstar = {p: os.path.join(self.working_dir, p, "superstar") for p in self.runs}
        self.superstar_time = {k: os.path.join(v, "time.time") for k, v in self.superstar.items()}

        self.hotspot = {p: os.path.join(self.working_dir, p, "hotspot") for p in self.runs}
        self.hotspot_time = {k: os.path.join(v, "time.time") for k, v in self.hotspot.items()}

        self.bcv = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "volume_{}".format(k))
                              for k in self.ligand_id[j]}
                        for j, pid in enumerate(self.protein_id)}
                    for i in self.runs}
        self.bcv_time = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "volume_{}".format(k), "time.time")
                                   for k in self.ligand_id[j]}
                             for j, pid in enumerate(self.protein_id)}
                         for i in self.runs}
        self.bcv_threshold = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "volume_{}".format(k),
                                                        "threshold.dat")
                                        for k in self.ligand_id[j]}
                                  for j, pid in enumerate(self.protein_id)}
                              for i in self.runs}

        self.bcv_lig_overlaps = {
            i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "lig_overlap_{}.percentage".format(k))
                      for k in self.ligand_id[j]}
                for j, pid in enumerate(self.protein_id)}
            for i in self.runs}

        self.bcv_hot_overlaps = {
            i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "hot_overlap_{}.percentage".format(k))
                      for k in self.ligand_id[j]}
                for j, pid in enumerate(self.protein_id)}
            for i in self.runs}

        self.hot_lig_overlaps = {
            i: {pid: {k: os.path.join(self.working_dir, i, "hotspot", "lig_overlap_{}.percentage".format(k))
                      for k in self.ligand_id[j]}
                for j, pid in enumerate(self.protein_id)}
            for i in self.runs}

        self.hot_hot_overlaps = {
            i: {pid: {k: os.path.join(self.working_dir, i, "hotspot", "hot_overlap_{}.percentage".format(k))
                      for k in self.ligand_id[j]}
                for j, pid in enumerate(self.protein_id)}
            for i in self.runs}

        self.atomic_overlaps = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "atomic_overlap_{}.dat".format(k))
                                  for k in self.ligand_id[j]}
                            for j, pid in enumerate(self.protein_id)}
                        for i in self.runs}

        self.matched = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "atom_match_{}.percentage".format(k))
                                  for k in self.ligand_id[j]}
                            for j, pid in enumerate(self.protein_id)}
                        for i in self.runs}

Beispiel #15

0

Datei anzeigen

    def write(self, hr):
        """
        writes the Fragment Hotspot Maps result to the output directory and create the pymol visualisation file

        :param `hotspots.result.Result` hr: a Fragment Hotspot Maps result or list of results

        >>> from hotspots.calculation import Runner
        >>> from hotspots.hs_io import HotspotWriter

        >>> r = Runner
        >>> result = r.from_pdb("1hcl")
        >>> out_dir = <path_to_out>
        >>> with HotspotWriter(out_dir) as w:
        >>>     w.write(result)


        """
        if isinstance(hr, list):
            self.settings.grids = list(hr[0].super_grids.keys())
            self.settings.container = "hotspot_boundaries"
            self.number_of_hotspots = len(hr)

            self.out_dir = Helper.get_out_dir(
                join(self.path, self.settings.container))

            self._write_protein(hr[0].protein)
            if hr[0].pharmacophore:
                self.settings.pharmacophore = True
            # hts = [h.hotspot_result for h in hr]
            self._write_pymol(hr, self.zipped)

            for i, hotspot in enumerate(hr):
                self.out_dir = Helper.get_out_dir(
                    join(self.path, self.settings.container, str(i)))
                self.settings.isosurface_threshold = [
                    round(hotspot.threshold, 1)
                ]

                bi = (Grid.super_grid(
                    2, hotspot.best_island).max_value_of_neighbours() >
                      hotspot.threshold)

                self._write_grids(hotspot.super_grids,
                                  buriedness=None,
                                  mesh=bi)
                self._write_protein(hotspot.protein)

                if hotspot.pharmacophore:
                    self._write_pharmacophore(hotspot.pharmacophore)

                self._write_pymol(hotspot, False)

            self.out_dir = dirname(self.out_dir)
            if self.zipped:
                self.compress(
                    join(dirname(self.out_dir), self.settings.container))

        else:
            self.settings.grids = list(hr.super_grids.keys())
            # self.settings.container = "out"
            self.number_of_hotspots = 1

            self.out_dir = Helper.get_out_dir(
                join(self.path, self.settings.container))
            self._write_grids(hr.super_grids, buriedness=hr.buriedness)
            self._write_protein(hr.protein)

            if hr.pharmacophore:
                self.settings.pharmacophore = True
                self._write_pharmacophore(hr.pharmacophore)
            self._write_pymol(hr, self.zipped)

            if self.zipped:
                self.compress(
                    join(dirname(self.out_dir), self.settings.container))

Beispiel #16

0

Datei anzeigen

    def _score_protein_cavity(self, prot):
        """
        (prefered option)
        score a protein's atoms, values stored as partial charge
        h_bond_distance between 1.5 - 2.5 A (2.0 A taken for simplicity)
        This method uses the cavity API to reduce the number of atoms to iterate over.

        :return: :class:`ccdc.protein.Protein`
        """
        feats = set([f for f in self.hotspot_result.features])
        h_bond_distance = 2.0
        interaction_pairs = {
            "acceptor": "donor",
            "donor": "acceptor",
            "pi": "apolar",
            "aliphatic": "apolar",
            "aromatic": "apolar",
            "apolar": "apolar",
            "donor_acceptor": "doneptor",
            "dummy": "dummy"
        }

        cavities = Helper.cavity_from_protein(self.object)

        for cavity in cavities:
            for feature in cavity.features:
                # all cavity residues
                for atm in feature.residue.atoms:
                    if atm.is_donor is False and atm.is_acceptor is False and atm.atomic_number != 1:
                        score = self.hotspot_result.super_grids[
                            'apolar'].get_near_scores(atm.coordinates)
                        if len(score) == 0:
                            score = 0
                        else:
                            score = max(score)
                        prot.atoms[atm.index].partial_charge = score

                # polar cavity residues
                if feature.type == "acceptor" or feature.type == "donor" or feature.type == "doneptor":
                    v = feature.protein_vector
                    translate = tuple(
                        map(h_bond_distance.__mul__, (v.x, v.y, v.z)))
                    c = feature.coordinates
                    coordinates = tuple(
                        map(operator.add, (c.x, c.y, c.z), translate))

                    if feature.atom:
                        score = [
                            f.score_value for f in feats
                            if f.grid.contains_point(coordinates, tolerance=2)
                            and f.feature_type == interaction_pairs[
                                feature.type]
                        ]
                        if len(score) == 0:
                            score = 0
                        else:
                            score = max(score)
                            print(score)

                        prot.atoms[feature.atom.index].partial_charge = score

                        # score hydrogen atoms (important for GOLD)
                        a = [
                            a.index
                            for a in prot.atoms[feature.atom.index].neighbours
                            if int(a.atomic_number) == 1
                        ]
                        if len(a) > 0:
                            for atm in a:
                                prot.atoms[atm].partial_charge = score

        return prot

Beispiel #17

0

Datei anzeigen

Datei: by_feature_scoring.py Projekt: mihaelasmilova/fragment_workflow

def get_polar_cluster_hits(hits_df, clusters_df, hits_dir):
    """

    :param hits_df:
    :param clusters_df:
    :return:
    """
    clust_hitlist = {}
    fu_id_list = []
    fu_smiles_list = []
    mean_hs_scores = []

    for i, row in hits_df.iterrows():
        scored_mols = Path(hits_dir, row['followup_id'],
                           'concat_ranked_docked_ligands_hs-scored.mol2')
        pose = int(row['pose_id'].split('_')[-1])
        ccdc_lig = MoleculeReader(str(scored_mols))[pose]
        fu_id_list.append(row['pose_id'])
        fu_smiles_list.append(row['followup_smiles'])
        mean_hs_scores.append(row['mean_hs_score'])
        for ic, rowc in clusters_df.iterrows():
            probe_type = rowc['probe_type']
            if probe_type == 'acceptor':
                tar_atoms = [a for a in ccdc_lig.heavy_atoms if a.is_acceptor]
            elif probe_type == 'donor':
                tar_atoms = [a for a in ccdc_lig.heavy_atoms if a.is_donor]

            c_coords = rowc['centre_of_mass']
            if type(c_coords) is str:
                x_coord = float(c_coords.split('x=')[1].split(',')[0])
                y_coord = float(c_coords.split('y=')[1].split(',')[0])
                z_coord = float(c_coords.split('z=')[1].split(')')[0])
                c_coords = Coordinates(x=x_coord, y=y_coord, z=z_coord)
            dists = [
                Helper.get_distance(at.coordinates, c_coords)
                for at in tar_atoms
            ]
            if (len(dists) > 0) and (min(dists) < rowc['cluster_radius'] + 1):
                hit = 1
            else:
                hit = 0
            try:
                # clust_hitlist[rowc['cluster_id']].append((min(dists)))
                clust_hitlist[rowc['cluster_id']].append(hit)
            except KeyError:
                # clust_hitlist[rowc['cluster_id']] = [(min(dists))]
                clust_hitlist[rowc['cluster_id']] = [hit]

    scored_df = pd.DataFrame()
    cols = clusters_df['cluster_id'].values

    scored_df['followup_id'] = fu_id_list
    scored_df['followup_smiles'] = fu_smiles_list
    scored_df['mean_hs_score'] = mean_hs_scores
    for cl in cols:
        scored_df[cl] = clust_hitlist[cl]
    hits_list = []
    for _, rowr in scored_df.iterrows():
        num_hits = sum(rowr[co] for co in cols)
        hits_list.append(num_hits)
    scored_df['number_hits'] = hits_list
    return scored_df