def __coo_atom(self, comp, m_ele): ''' Given a molecule<ccdc.molecule.Molecule> and a metal symbol<str> Return a dict about the metal with its coordinated atoms :param comp: <ccdc.molecule.Molecule>molecule :param m_ele: <str>metal elements symbol :param label: <bool>return atom label or atom entity :return:<dict>{<str>metal label: <set>{<str> coordinated atoms label} ''' dict_coo_atoms = dict( ) # dict for storing the coordination atom around the target metal ie.atoms one by one qm = QueryAtom(m_ele) qs = search.QuerySubstructure() qs.add_atom(qm) sub_search = search.SubstructureSearch() sub_search.add_substructure(qs) mol_metals = sub_search.search(comp) for mol_metal in mol_metals: set_atom = set() metal = mol_metal.match_atoms() set_atom.update([N_atom.label for N_atom in metal[0].neighbours]) metal_label = metal[0].label dict_coo_atoms[metal_label] = set_atom return dict_coo_atoms
def delete_anion(self, path_anion): ''' removing anions which are defined by mol2 file in a entry :param path_anion: the defined anions files :return: None ''' if os.path.isdir(path_anion): anion_list = [ search.MoleculeSubstructure( io.MoleculeReader(f)[0].components[0]) for f in glob.glob(os.path.join(path_anion, '*.mol2')) ] else: raise FileExistsError('do not find the path!') list_crystals_remove_anion = [] p_bar = tqdm(self.entry_reader) for entry in p_bar: if entry.has_3d_structure: # Ensure labels are unique mol = entry.molecule mol.normalise_labels() # Use a copy clone = mol.copy() # Remove all metal atoms clone.remove_atoms(a for a in clone.atoms if a.is_metal or not a.bonds) for c in clone.components: for anion in anion_list: ani_search = search.SubstructureSearch() ani_search.add_substructure(anion) hits = ani_search.search(c) for hit in hits: hit_atoms = hit.match_atoms() if len(hit_atoms) == len(c.atoms): mol.remove_atoms( mol.atom(a.label) for a in hit_atoms) entry.crystal.molecule = self.__delete_isolated_atoms(mol) list_crystals_remove_anion.append(entry) p_bar.set_description('Anions removing...') self.entry_reader = list_crystals_remove_anion
def get_all_function_groups(path_mols, path_con): """从*.mol2文件中找到指定基团的类型及数量 :param path_cifs: :param path_con: :return: """ # 确定每个已经去除了溶剂的*.mol2文件的名称和绝对路径 list_mol_names = os.listdir(path_mols) list_path_mols = glob.glob(os.path.join(path_mols, '*.mol2')) # 通过con定义功能基团 list_con_names = os.listdir(path_con) path_conner_list = glob.glob(os.path.join(path_con, '*.con')) list_connser_substructure = [] for path in path_conner_list: connser_substructure = search.ConnserSubstructure(path) list_connser_substructure.append(connser_substructure) # 读取mol2文件中 dict_result = dict() count = 0 pbar = tqdm(list_path_mols) for path_cif_temp in pbar: list_temp = [ ] # 维度为len(list_connser_substructure),即维度为定义的官能团个数;该列表用于储存当前cif文件中包含基团的数目 mol_temp = io.MoleculeReader(path_cif_temp)[0] # 读取cif文件 for func_group in list_connser_substructure: substructure_search = search.SubstructureSearch() _ = substructure_search.add_substructure(func_group) hits = substructure_search.search(mol_temp) list_temp.append(len(hits)) dict_result[list_mol_names[count]] = list_temp count += 1 pbar.set_description('正在统计所有的指定基团:') return dict_result, list_con_names
def get_neighbor_function_groups(path_mols, path_con, query_atom): # 确定每个已经去除了溶剂的*.mol2文件的名称和绝对路径 list_mol_names = os.listdir(path_mols) list_path_mols = glob.glob(os.path.join(path_mols, '*.mol2')) # 通过con定义功能基团 list_con_names = os.listdir(path_con) path_conner_list = glob.glob(os.path.join(path_con, '*.con')) list_connser_substructure = [] for path in path_conner_list: connser_substructure = search.ConnserSubstructure(path) list_connser_substructure.append(connser_substructure) # 统计配位基团的类型及数量 dict_result = dict() pbar = tqdm(range(len(list_path_mols))) for i in pbar: # 读取分子,并且读取出其中的components path_mol = list_path_mols[i] mol = io.MoleculeReader(path_mol)[0] list_components = mol.components mol.normalise_labels() # 统计每个基团在分子中出现的次数 list_temp = [] # 储存每个mol2文件中匹配到的配位基团的数量 for con in list_connser_substructure: count_temp = 0 # 基团出现数量 for component in list_components: set_temp = set() # 用于存放出现的基团的字符串 # 查询金属原子 m = QueryAtom(query_atom) s = search.QuerySubstructure() s.add_atom(m) sub_search = search.SubstructureSearch() sub_search.add_substructure(s) mol_metals = sub_search.search(component) if len(mol_metals) > 0: substructure_search = search.SubstructureSearch() substructure_search.add_substructure(con) hits = substructure_search.search(component) if len(hits) > 0: for hit in hits: temp_hit_atoms = hit.match_atoms() # 匹配到的基团的原子 for temp_metal in mol_metals: temp_metal = temp_metal.match_atoms()[0] common_elements = set( temp_metal.neighbours) & set( temp_hit_atoms) if len(common_elements) > 0: set_temp.add(str(temp_hit_atoms)) # for num in range(len(mol_metals)): # metal_label = query_atom + str(num + 1) # temp_metal = component.atom(metal_label) # common_elements = set(temp_metal.neighbours) & set(temp_hit_atoms) # if len(common_elements) > 0: # set_temp.add(str(temp_hit_atoms)) count_temp += len(set_temp) list_temp.append(count_temp) dict_result[list_mol_names[i]] = list_temp return dict_result, list_con_names
def coordination_bond_length(self, element=None): ''' :param dict_coo_sub: :param element: :return: ''' p_bar = tqdm(self.entry_reader) # {bond type: [bond type, ideal bond length, bond length, substructure, # The main of substructure connection, Connection with ligands # metal, coordinated atom, identifier] dict_bond = dict() for entry in p_bar: # Getting molecule mol = entry.molecule # Remove all of hydrogen mol.remove_hydrogens() # Ensure labels are unique mol.normalise_labels() # remove single_atoms mol.remove_atoms([ single_atom for single_atom in mol.atoms if not single_atom.bonds ]) # dict to save coordinated bonds information dict_mol_bonds = dict() for comp in mol.components: dict_coo_atoms = self.__coo_atom(comp, element) # Getting the bond length dict_bond_length = self.__measurement_bond_length( comp, element) # Get coordinate substructures which the coordinated atoms belong to for metal in dict_coo_atoms: # Set of atoms in the matched substructure in the molecule set_sub_atoms = set() set_coo_atoms_label = dict_coo_atoms[metal] # Set of coordinated atoms in defined substructures set_coo_atoms_label_in_sub = set() ''' Get information of coordinated atoms about which substructure belongs to ''' list_sub_name = sorted(self.dict_substructure, reverse=True) for sub_name in list_sub_name: # Defining method of substructure searching substructure_search = search.SubstructureSearch() substructure_search.add_substructure( self.dict_substructure[sub_name]) # Searching hits = substructure_search.search(comp) if not not hits: for hit in hits: hit_atoms_label = set( atom.label for atom in hit.match_atoms()) if not any(atom_label in set_sub_atoms for atom_label in hit_atoms_label): set_sub_atoms.update([ atom.label for atom in hit.match_atoms() if atom.atomic_symbol != 'C' ]) coo_atoms_in_sub_label = set_coo_atoms_label & hit_atoms_label if not not coo_atoms_in_sub_label: set_coo_atoms_label_in_sub.update( coo_atoms_in_sub_label) # Add the substructures type to bond information # Add the type of the main that the substructure connects with for coo_atom_in_sub_label in coo_atoms_in_sub_label: if len(dict_bond_length[ metal + '-' + coo_atom_in_sub_label] ) < 4: dict_bond_length[metal + '-' + coo_atom_in_sub_label]. \ extend([sub_name, self.__main_type_sub_connect(hit)]) # Set of coordinated atoms out defined substructures set_coo_atoms_label_out_sub = set_coo_atoms_label - set_coo_atoms_label_in_sub # Filling coordination atoms with Nan that do not belong to any defined substructure for coo_atom_out_sub_label in set_coo_atoms_label_out_sub: dict_bond_length[metal + '-' + coo_atom_out_sub_label]. \ extend(['NaN', 'NaN']) ''' Get information of Connection of metal with ligands ''' c_comp = comp.copy() c_comp.remove_atoms(atom for atom in c_comp.atoms if atom.atomic_symbol == element) set_coo_atoms_label_in_comp = set() for sub_c_comp in c_comp.components: set_sub_c_comp_atoms = set( atom.label for atom in sub_c_comp.atoms) common_coo_atoms = set_sub_c_comp_atoms & set_coo_atoms_label set_coo_atoms_label_in_comp.update(common_coo_atoms) len_common_coo_atoms = len(common_coo_atoms) if len_common_coo_atoms != 0: for common_coo_atom in common_coo_atoms: dict_bond_length[metal + '-' + common_coo_atom].append( len_common_coo_atoms) set_coo_atoms_label_out_comp = set_coo_atoms_label - set_coo_atoms_label_in_comp for common_coo_atom in set_coo_atoms_label_out_comp: dict_bond_length[metal + '-' + common_coo_atom].append(0) # Setting basic information for bonds for metal in dict_coo_atoms: for coo_atom in dict_coo_atoms[metal]: dict_bond_length[metal + '-' + coo_atom].extend( [metal, coo_atom, entry]) dict_bond_length[metal + '-' + coo_atom].insert( 0, metal + '-' + coo_atom) dict_bond_length[metal + '-' + coo_atom].insert( 0, element + '-' + mol.atom(coo_atom).atomic_symbol) # Save information of bond length dict_mol_bonds.update(dict_bond_length) # Save results for bond in dict_mol_bonds: if dict_mol_bonds[bond][0] in dict_bond: dict_bond[dict_mol_bonds[bond][0]].append( dict_mol_bonds[bond]) else: dict_bond[dict_mol_bonds[bond][0]] = [] dict_bond[dict_mol_bonds[bond][0]].append( dict_mol_bonds[bond]) return dict_bond