Ejemplo n.º 1
0
    def generate_layer(self, iterations=2, thickness=10):
        layer_dict = {}
        prot = Protein.from_file(self.protein)
        masked_grids = self.bcv_result._single_grid()

        for i in range(1, iterations + 1):
            if i == 1:
                #initial layer
                difference_layer = self._get_sphere_grid(
                    template=self.bcv_result.super_grids["apolar"].copy(),
                    molecule=io.MoleculeReader(self.fragment)[0])
                self.inner = difference_layer
                hr = self._diff_to_map(diff=difference_layer,
                                       map=masked_grids,
                                       prot=prot)

            else:
                self.outer = self.inner.dilate()
                for j in range(1, thickness):
                    self.outer = self.outer.dilate()

                difference_layer = self.outer - self.inner
                self.inner = self.outer

                hr = self._diff_to_map(diff=difference_layer,
                                       map=masked_grids,
                                       prot=prot)

            layer_dict.update({"{}".format(i): hr})

        return layer_dict
Ejemplo n.º 2
0
def run_conformers(inputs):
    mol_file, outdir = inputs

    fname = os.path.basename(mol_file).split(".")[0] + "_conf.mol2"
    conf_file = os.path.join(outdir, fname)

    conf_gen = conformer.ConformerGenerator()
    conf_gen.settings.max_conformers = 25

    with io.MoleculeReader(mol_file) as reader, io.MoleculeWriter(
            conf_file) as writer:
        for m in reader:
            confs = conf_gen.generate(m)
            # If conformer generation fails, ConformerGenerator returns None rather
            # than []. Trying to iterate over this will crash the script, so we skip
            # further steps for the structure at this point.
            if confs is None:
                print(
                    'WARNING: Conformer generation failed for structure %s in %s!'
                    % (m.identifier, mol_file))
                continue

            for i, c in enumerate(confs):
                m = c.molecule
                # DUD-E includes multiple protonation and tautomeric states. For the database creation
                # these must be given unique ids. In the rank analysis only the highest ranked state
                # will count.
                # ID_{file number}_{molecule number}_{conformer number}   --> ensures unique ID
                m.identifier = f"{m.identifier}_{i}"
                writer.write(m)
Ejemplo n.º 3
0
    def _get_atomic_overlap(self, cav_id, other_id, lig_id):
        """
        find the highest median bcv from all cavities, calculate percentage over between the best bcv
        and each query ligand

        :return:
        """
        # inputs
        mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0]
        path = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip")
        if os.path.exists(path):
            hr = HotspotReader(path).read()

            # tasks
            out = hr.atomic_volume_overlap(mol)

        else:
            print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id))
            out = {"donor": {}, "acceptor": {}, "apolar": {}}
            for a in mol.heavy_atoms:
                t = Helper.get_atom_type(a)
                if t == "doneptor":
                    out["donor"].update({a.label: 0.0})
                    out["acceptor"].update({a.label: 0.0})
                else:
                    out[t].update({a.label: 0.0})

        # output
        with open(self.atomic_overlaps[cav_id][other_id][lig_id], 'w') as writer:
            writer.write(str(out))
    def run(self):
        prot = Protein.from_file(self.input().path)
        mol = io.MoleculeReader('ligands/{}.sdf'.format(self.pdb))[0]

        h = Runner()
        s = h.Settings()
        s.apolar_translation_threshold = 15
        s.polar_translation_threshold = 15
        s.polar_contributions = False
        s.sphere_maps = True
        s.nrotations = 3000
        hr = h.from_protein(prot,
                            buriedness_method='ghecom',
                            nprocesses=1,
                            settings=s,
                            cavities=mol)

        out_settings = HotspotWriter.Settings()
        out_settings.charged = False
        w = HotspotWriter(os.path.dirname(self.output().path),
                          grid_extension=".grd",
                          zip_results=True,
                          settings=out_settings)

        w.write(hr)
Ejemplo n.º 5
0
    def _get_volume_overlap(self, cav_id, other_id, lig_id):
        """
        find the highest median bcv from all cavities, calculate percentage over between the best bcv
        and each query ligand

        :return:
        """

        def nonzero(val):
            if val == 0:
                return 1
            else:
                return val

        # inputs
        mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0]
        path1 = os.path.join(self.hotspot[cav_id], "out.zip")
        path2 = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip")
        thresholds = [10, 14, 17]

        if os.path.exists(path1) and os.path.exists(path2):
            bcv = HotspotReader(path2).read()
            hot = HotspotReader(path1).read()

            # tasks
            other = Grid.from_molecule(mol)

            bcv_sg = Grid.get_single_grid(bcv.super_grids, mask=False)
            bcv_overlap = bcv_sg._mutually_inclusive(other=other).count_grid()

            lig_vol = (other > 0).count_grid()
            bcv_vol = (bcv_sg > 0).count_grid()

            hot_sgs = [(Grid.get_single_grid(hot.super_grids, mask=False) > t)
                       for t in thresholds]
            hot_vols = [nonzero(hot_sg.count_grid())
                        for hot_sg in hot_sgs]
            hot_overlap = [hot_sg._mutually_inclusive(other=other).count_grid() for hot_sg in hot_sgs]

            # output
            with open(self.bcv_lig_overlaps[cav_id][other_id][lig_id], 'w') as writer:
                writer.write(str((bcv_overlap / lig_vol) * 100))

            with open(self.bcv_hot_overlaps[cav_id][other_id][lig_id], 'w') as writer:
                writer.write(str((bcv_overlap / bcv_vol) * 100))

            with open(self.hot_lig_overlaps[cav_id][other_id][lig_id], 'w') as writer:
                hot_lig = [str((a / lig_vol) * 100) for a in hot_overlap]
                print(hot_lig)
                writer.write(",".join(hot_lig))

            with open(self.hot_hot_overlaps[cav_id][other_id][lig_id], 'w') as writer:
                hot_hot = [str((hot_overlap[i] / hot_vols[i]) * 100) for i in range(len(thresholds))]
                writer.write(",".join(hot_hot))

        else:
            print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id))
Ejemplo n.º 6
0
    def run_docking(self):
        #take virtual library and run docking
        print "Run GOLD docking ..."

        docker = Docker()
        settings = docker.settings

        self.start_ligand = io.MoleculeReader(
            os.path.join(self.in_dir, "fragment.mol2"))[0]
        tempd = tempfile.mkdtemp()

        settings.add_protein_file(os.path.abspath(self.protein))
        settings.binding_site = settings.BindingSiteFromPoint(
            settings.proteins[0], self.start_ligand.centre_of_geometry(), 10.0)
        settings.fitness_function = 'plp'
        settings.autoscale = 10.
        settings.output_directory = tempd
        #settings.output_directory = self.in_dir
        settings.output_file = "docked_ligands.mol2"
        settings.add_ligand_file(self.add_ligands, ndocks=10)

        #setup constraints
        settings.add_constraint(
            settings.TemplateSimilarityConstraint(type="all",
                                                  template=self.start_ligand,
                                                  weight=150))

        settings.ProteinFileInfo().fitting_points_file("fname.mol2")
        #feed in layer2
        #self.hotspot_result.predict_protein_hbond_constraints(settings, weight = 100)

        results = docker.dock()

        #fragment = results.ligands[0]
        ligand_reader = results.ligands
        output_file = os.path.join(settings.output_directory,
                                   settings.output_file)
        docked_molecules = [
            m for m in io.MoleculeReader(os.path.join(tempd, output_file))
        ]
        print docked_molecules

        return docked_molecules
Ejemplo n.º 7
0
def mogul_summary(filename, engine):
    mr = io.MoleculeReader(filename)
    mol = mr[0]
    gmol = engine.analyse_molecule(mol)
    name = os.path.splitext(os.path.basename(filename))[0]
    out = [name]
    for geom in (gmol.analysed_angles, gmol.analysed_bonds,
                 gmol.analysed_rings, gmol.analysed_torsions):
        out.append(len(geom))
        out.append(len([x for x in geom if x.unusual]))
    print(",".join([str(x) for x in out]))
Ejemplo n.º 8
0
    def run(self):
        mols = io.MoleculeReader(self.input()['ligands'].path)
        hr = HotspotReader(self.input()['hs_result'].path).read()

        with open(self.output().path, 'w') as csv_file:
            csv_file.write("mol_id,atom_id,score\n")
            for mol in mols:
                scored_mol = hr.score(mol)
                for a in scored_mol.heavy_atoms:
                    out_str = "{},{},{}\n".format(mol.identifier, a.label,
                                                  a.partial_charge)
                    csv_file.write(out_str)
Ejemplo n.º 9
0
    def testdetect_from_ligand_ensemble(self):
        wrk_dir = "testdata/pharmacophore_extension/LigandPharmacophoreModel/from_ligand_ensemble"
        with PushDir(wrk_dir):
            test_overlay = io.MoleculeReader("test_overlay.mol2")
            ligand_pharmacophore = LigandPharmacophoreModel()
            ligand_pharmacophore.feature_definitions = [
                "ring_planar_projected"
            ]

            ligand_pharmacophore.detect_from_ligand_ensemble(
                ligands=test_overlay, cutoff=2)
            # ligand_pharmacophore.pymol_visulisation(outdir="")

            self.assertEqual(2, len(ligand_pharmacophore.detected_features))
Ejemplo n.º 10
0
def main():
    base = "/local/pcurran/leads_frag"

    pdbs = [
        p for p in os.listdir(base) if os.path.isdir(os.path.join(base, p))
    ]

    for pdb in tqdm(pdbs):
        fpath = os.path.join(base, pdb, f"{pdb}_ref.mol2")
        mol = io.MoleculeReader(fpath)[0]
        g = Grid.from_molecule(mol, mode='replace', padding=10, scaling=0.5)

        out_path = os.path.join(base, pdb, "control.mol2")
        docking_fitting_pts(g, fname=out_path, high=1)
Ejemplo n.º 11
0
    def score_hitlist(self):
        #volume overlap between initial fragment and subtitution
        #self.start_ligand
        overlap_cutoff = 0.85
        lig = io.MoleculeReader(os.path.join(self.in_dir, "fragment.mol2"))[0]

        ref = self.bcv_result.super_grids["apolar"].copy()
        ref *= 0

        for a in lig.atoms:
            ref.set_sphere(a.coordinates, a.vdw_radius, 1)

        #placeholder atom rmsd needed
        ref_a = [
            at for at in lig.atoms
            if at.is_cyclic and at.is_donor and at.atomic_weight == 14.0067
        ][0]
        for i, h in enumerate(self.hit_list):
            print i
            clean = self.bcv_result.super_grids["apolar"].copy()
            clean *= 0
            for b in h.atoms:
                clean.set_sphere(b.coordinates, b.vdw_radius, 1)

            overlap = (ref > 0) & (clean > 0)

            percentage_overlap = float(len(self.get_scores(overlap))) / float(
                len(self.get_scores(ref)))

            hit_b = [
                atm for atm in h.atoms if atm.is_cyclic and atm.is_donor
                and atm.atomic_weight == 14.0067
            ][0]
            fudge_shift = self.fudge_shift(ref_a, hit_b)

            #print percentage_overlap, len(self.get_scores(ref)), len(self.get_scores(clean)), len(self.get_scores(overlap))

            if percentage_overlap > overlap_cutoff and fudge_shift < 2:

                hotspot_score = self.hotspot_result.score_ligand(h)
                self.score_dict.update({h: hotspot_score})

        d = OrderedDict(
            sorted(self.score_dict.items(), key=itemgetter(1), reverse=True))

        return d
Ejemplo n.º 12
0
    def _get_ligand_volume(self, other_id, lig_id):
        """
        from a ligand, output a molecular volume in A^3

        :param i: position in list of 'other' proteins
        :return:
        """
        # inputs
        ligand = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0]

        # tasks
        g = Grid.from_molecule(ligand)
        vol = g.count_grid() * (g.spacing ** 3)

        # output
        with open(self.ligand_volume[other_id][lig_id], 'w') as f:
            f.write(str(vol))
Ejemplo n.º 13
0
    def testdetect_from_ligand_ensemble_cdk2(self):
        wrk_dir = "testdata/pharmacophore_extension/LigandPharmacophoreModel/from_ligand_ensemble_big_all"
        with PushDir(wrk_dir):
            test_overlay = io.MoleculeReader("cdk2_ligands.mol2")
            ligand_pharmacophore = LigandPharmacophoreModel()
            ligand_pharmacophore.feature_definitions = [
                "ring_planar_projected", "donor_projected",
                "acceptor_projected"
            ]

            ligand_pharmacophore.detect_from_ligand_ensemble(
                ligands=test_overlay, cutoff=2)

            feature_count = 4
            selected = ligand_pharmacophore.top_features(num=feature_count)
            ligand_pharmacophore.detected_features = selected

            self.assertEqual(feature_count, len(ligand_pharmacophore))
Ejemplo n.º 14
0
    def delete_anion(self, path_anion):
        '''
        removing anions which are defined by mol2 file in a entry
        :param path_anion: the defined anions files
        :return: None
        '''

        if os.path.isdir(path_anion):
            anion_list = [
                search.MoleculeSubstructure(
                    io.MoleculeReader(f)[0].components[0])
                for f in glob.glob(os.path.join(path_anion, '*.mol2'))
            ]
        else:
            raise FileExistsError('do not find the path!')

        list_crystals_remove_anion = []
        p_bar = tqdm(self.entry_reader)

        for entry in p_bar:
            if entry.has_3d_structure:
                # Ensure labels are unique
                mol = entry.molecule
                mol.normalise_labels()
                # Use a copy
                clone = mol.copy()
                # Remove all metal atoms
                clone.remove_atoms(a for a in clone.atoms
                                   if a.is_metal or not a.bonds)
                for c in clone.components:
                    for anion in anion_list:
                        ani_search = search.SubstructureSearch()
                        ani_search.add_substructure(anion)
                        hits = ani_search.search(c)
                        for hit in hits:
                            hit_atoms = hit.match_atoms()
                            if len(hit_atoms) == len(c.atoms):
                                mol.remove_atoms(
                                    mol.atom(a.label) for a in hit_atoms)
                entry.crystal.molecule = self.__delete_isolated_atoms(mol)
                list_crystals_remove_anion.append(entry)
            p_bar.set_description('Anions removing...')

        self.entry_reader = list_crystals_remove_anion
Ejemplo n.º 15
0
def chunk_files(mol_file, outdir, chunk_size=100):
    outfiles = list()

    if not os.path.exists(outdir):
        os.mkdir(outdir)

    mols = [m for m in io.MoleculeReader(mol_file)]

    chunks = [mols[x:x + chunk_size] for x in range(0, len(mols), chunk_size)]

    for i, chunk in enumerate(chunks):
        fname = f"{os.path.basename(mol_file).split('.')[0]}_chunk{i}.mol2"
        outfile = os.path.join(outdir, fname)
        outfiles.append(outfile)

        with io.MoleculeWriter(outfile) as w:
            for j, mol in enumerate(chunk):
                mol.identifier = f"{mol.identifier}_{i}_{j}"
                w.write(mol)

    return outfiles
Ejemplo n.º 16
0
    def get_all_function_groups(path_mols, path_con):
        """从*.mol2文件中找到指定基团的类型及数量

        :param path_cifs:
        :param path_con:
        :return:
        """

        # 确定每个已经去除了溶剂的*.mol2文件的名称和绝对路径
        list_mol_names = os.listdir(path_mols)
        list_path_mols = glob.glob(os.path.join(path_mols, '*.mol2'))

        # 通过con定义功能基团
        list_con_names = os.listdir(path_con)
        path_conner_list = glob.glob(os.path.join(path_con, '*.con'))
        list_connser_substructure = []
        for path in path_conner_list:
            connser_substructure = search.ConnserSubstructure(path)
            list_connser_substructure.append(connser_substructure)

        # 读取mol2文件中
        dict_result = dict()
        count = 0
        pbar = tqdm(list_path_mols)
        for path_cif_temp in pbar:
            list_temp = [
            ]  # 维度为len(list_connser_substructure),即维度为定义的官能团个数;该列表用于储存当前cif文件中包含基团的数目
            mol_temp = io.MoleculeReader(path_cif_temp)[0]  # 读取cif文件
            for func_group in list_connser_substructure:
                substructure_search = search.SubstructureSearch()
                _ = substructure_search.add_substructure(func_group)
                hits = substructure_search.search(mol_temp)
                list_temp.append(len(hits))
            dict_result[list_mol_names[count]] = list_temp
            count += 1
        pbar.set_description('正在统计所有的指定基团:')

        return dict_result, list_con_names
Ejemplo n.º 17
0
    def _get_matched_atoms(self, cav_id, other_id, lig_id):
        """
        This is the ligand overlap implimentation in the DoGsiter paper

        :param cav_id:
        :param other_id:
        :param lig_id:
        :return:
        """
        # inputs
        mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0]
        path = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip")
        if os.path.exists(path):
            hr = HotspotReader(path).read()

            # tasks
            perc, type_dic = hr.percentage_matched_atoms(mol=mol, threshold=0, match_atom_types=True)

            # output
            with open(self.matched[cav_id][other_id][lig_id], 'w') as writer:
                writer.write(str(perc) + "\n")
                writer.write(str(type_dic))
        else:
            print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id))
Ejemplo n.º 18
0
    def __init__(self, in_dir, charged=False, library=True):
        self.in_dir = in_dir

        self.fragment = os.path.join(self.in_dir, "fragment.mol2")
        if library == True:
            self.virtual_library = [
                m for m in io.MoleculeReader("virtual_library.mol2")
            ]
        else:
            self.virtual_library = self.generate_library()

        self.protein = os.path.join(self.in_dir, "protein.pdb")
        self.charged = charged
        self.hotspot_result = self.generate_hotspot()
        self.bcv_result = self.generate_BCV()
        self.layer_dict = self.generate_layer()

        self.constraints = self.growing_constraints(self.layer_dict["2"])

        #self.add_ligands = os.path.join(self.in_dir, "decorated_fragments.mol2")
        #self.hit_list = self.run_docking()

        # {ccdc.Molecule: [percentage_overlap, hotspot_score]
        self.score_dict = {}
Ejemplo n.º 19
0
import os
from ccdc import io
from ccdc.descriptors import MolecularDescriptors


if __name__ == "__main__":

    pdb = "4G46"
    base = f"/local/pcurran/leads_frag/{pdb}"
    #  test
    mol1 = io.MoleculeReader(os.path.join(base, f"{pdb}_ligand.mol2"))[0]
    mol2 = io.MoleculeReader(os.path.join(base, f"{pdb}_ref.mol2"))[0]

    mol3 = io.MoleculeReader(os.path.join(base, "gold/goldscore/data/ranked_4G46_ligand_m1_1.mol2"))[0]

    rm = []

    for atm in mol3.heavy_atoms:
        if atm.label == "****":
            rm.append(atm)

    mol3.remove_atoms(rm)

    print([atm.label for atm in mol1.heavy_atoms])
    print([atm.label for atm in mol2.heavy_atoms])
    print([atm.label for atm in mol3.heavy_atoms])

    a = MolecularDescriptors.rmsd(mol1, mol3)

Ejemplo n.º 20
0
from ccdc import io

from scipy.spatial import distance

mol = io.MoleculeReader(
    "/local/pcurran/leads_frag/3CHC/gold/goldscore/data/fit_pts.mol2")[0]

pt_1 = [[a.coordinates.x, a.coordinates.y, a.coordinates.z]
        for i, a in enumerate(mol.atoms) if i <= 0]

pt_all = [[a.coordinates.x, a.coordinates.y, a.coordinates.z]
          for i, a in enumerate(mol.atoms) if i > 0]

ds = distance.cdist(pt_1, pt_all)

print(min(ds[0]))
Ejemplo n.º 21
0
    action='store_true',
    default=False,
    help="Don't run minimisation on conformers (default: False)")
parser.add_argument('--nocleanup',
                    '-nc',
                    action='store_true',
                    default=False,
                    help="Don't remove conformer search logs (default: False)")

args = parser.parse_args()

# Read molecule ------------------------------------------------------------------------------------
print('%s\nReading molecule: %s\n' % ('-' * 40, args.molecule))
mol_path = args.molecule
mol_name = os.path.splitext(os.path.basename(mol_path))[0]
mol_reader = io.MoleculeReader(mol_path)
mol = mol_reader[0]
mol_reader.close()

# Generate conformers ------------------------------------------------------------------------------
if args.conformers > 0:
    print('Generating %i conformers...' % args.conformers)
    conformer_generator = conformer.ConformerGenerator(
    )  # Initialize conformer generator
    conformer_generator.settings.max_conformers = args.conformers  # Set max number of conformers
    conformers = conformer_generator.generate(mol)  # Run conformer generator
else:
    print('Skipping conformer generation...')
    conformers = None

if conformers is not None:
Ejemplo n.º 22
0
from ccdc import io
import pandas as pd
import numpy as np
import time

np.random.seed(901)
csd_reader = io.MoleculeReader('CSD')


class Mol():
    """
    A wrapper class for csd molecule objects. 
    """
    def __init__(self, index):
        self._molecule = self.get_mol(index)

    def __getattr__(self, attr):
        """Wraps this class object around a CSD molecule object."""
        if attr in self.__dict__:
            return getattr(self, attr)
        return getattr(self._molecule, attr)

    def get_mol(self, index):
        """Acquires a molecule object from the CSD, using either the string 
        label for the structure, or its numerical index."""
        try:
            return csd_reader[index]
        except TypeError:
            return csd_reader.molecule(index)

    def remove_unlocated(self):
Ejemplo n.º 23
0
    def delete_solvents(self, list_solvent_names=None):
        """删除晶体中的溶剂,若没有指定溶剂列表,则默认为CCDC数据库自带的溶剂列表

        :param list_solvent_names: 溶剂名称构成的列表,type:list or tuple
        :return: None
        """

        # CSD数据库的溶剂所在的路径
        solvent_file = os.path.join(os.path.dirname(io.csd_directory()),
                                    'Mercury', 'molecular_libraries',
                                    'ccdc_solvents')

        # 若没指定需要去除的溶剂列表,则会将CSD数据库中指定的74个溶剂都考虑进去。以下代码得到溶剂的smiles字符串
        if not list_solvent_names:
            if os.path.isdir(solvent_file):
                solvent_smiles = [
                    io.MoleculeReader(f)[0].smiles
                    for f in glob.glob(os.path.join(solvent_file, '*.mol2'))
                ]
            else:
                raise FileExistsError('路径不存在!')
        else:
            if os.path.isdir(solvent_file):
                solvent_smiles = [
                    io.MoleculeReader(
                        os.path.join(solvent_file, solvent + '.mol2')[0].smiles
                        for solvent in list_solvent_names)
                ]
            else:
                raise FileExistsError('路径不存在!')

        # 去除溶剂
        list_crystals_remove_solvents = []
        p_bar = tqdm(self.entry_reader)
        for entry in p_bar:
            try:
                if entry.has_3d_structure:
                    # Ensure labels are unique
                    mol = entry.molecule
                    mol.normalise_labels()
                    # Use a copy
                    clone = mol.copy()
                    # Remove all bonds containing a metal atom
                    clone.remove_bonds(b for b in clone.bonds
                                       if any(a.is_metal for a in b.atoms))
                    # Work out which components to remove
                    to_remove = [
                        c for c in clone.components
                        if not self.has_metal(c) and (not self.is_multidentate(
                            c, mol) or self.is_solvent(c, solvent_smiles))
                    ]
                    # Remove the atoms of selected components
                    mol.remove_atoms(
                        mol.atom(a.label) for c in to_remove for a in c.atoms)
                    # Write the CIF
                    entry.crystal.molecule = mol
                    list_crystals_remove_solvents.append(entry)
                else:
                    list_crystals_remove_solvents.append(entry)
            except BaseException:
                list_crystals_remove_solvents.append(entry)
            p_bar.set_description('正在去除溶剂:')
        self.entry_reader = list_crystals_remove_solvents
        return None
Ejemplo n.º 24
0
    def get_neighbor_function_groups(path_mols, path_con, query_atom):

        # 确定每个已经去除了溶剂的*.mol2文件的名称和绝对路径
        list_mol_names = os.listdir(path_mols)
        list_path_mols = glob.glob(os.path.join(path_mols, '*.mol2'))

        # 通过con定义功能基团
        list_con_names = os.listdir(path_con)
        path_conner_list = glob.glob(os.path.join(path_con, '*.con'))
        list_connser_substructure = []
        for path in path_conner_list:
            connser_substructure = search.ConnserSubstructure(path)
            list_connser_substructure.append(connser_substructure)

        # 统计配位基团的类型及数量
        dict_result = dict()
        pbar = tqdm(range(len(list_path_mols)))

        for i in pbar:
            # 读取分子,并且读取出其中的components
            path_mol = list_path_mols[i]
            mol = io.MoleculeReader(path_mol)[0]
            list_components = mol.components
            mol.normalise_labels()
            # 统计每个基团在分子中出现的次数
            list_temp = []  # 储存每个mol2文件中匹配到的配位基团的数量
            for con in list_connser_substructure:
                count_temp = 0  # 基团出现数量
                for component in list_components:
                    set_temp = set()  # 用于存放出现的基团的字符串
                    # 查询金属原子
                    m = QueryAtom(query_atom)
                    s = search.QuerySubstructure()
                    s.add_atom(m)
                    sub_search = search.SubstructureSearch()
                    sub_search.add_substructure(s)
                    mol_metals = sub_search.search(component)

                    if len(mol_metals) > 0:
                        substructure_search = search.SubstructureSearch()
                        substructure_search.add_substructure(con)
                        hits = substructure_search.search(component)

                        if len(hits) > 0:
                            for hit in hits:
                                temp_hit_atoms = hit.match_atoms()  # 匹配到的基团的原子
                                for temp_metal in mol_metals:
                                    temp_metal = temp_metal.match_atoms()[0]
                                    common_elements = set(
                                        temp_metal.neighbours) & set(
                                            temp_hit_atoms)
                                    if len(common_elements) > 0:
                                        set_temp.add(str(temp_hit_atoms))
                                # for num in range(len(mol_metals)):
                                #     metal_label = query_atom + str(num + 1)
                                #     temp_metal = component.atom(metal_label)
                                #     common_elements = set(temp_metal.neighbours) & set(temp_hit_atoms)
                                #     if len(common_elements) > 0:
                                #         set_temp.add(str(temp_hit_atoms))
                    count_temp += len(set_temp)
                list_temp.append(count_temp)
            dict_result[list_mol_names[i]] = list_temp

        return dict_result, list_con_names