def get_multiring_atoms_bonds(self, rdk_mol: Mol, smiles): ''' Not used ''' atom_ring_times = [0] * rdk_mol.GetNumAtoms() bond_ring_times = [0] * rdk_mol.GetNumBonds() # TODO GetRingInfo gives SymmetricSSSR, not TRUE SSSR ri = rdk_mol.GetRingInfo() for id_atoms in ri.AtomRings(): for ida in id_atoms: atom_ring_times[ida] += 1 for id_bonds in ri.BondRings(): for idb in id_bonds: bond_ring_times[idb] += 1 n_atoms_multiring = len(list(filter(lambda x: x > 1, atom_ring_times))) n_bonds_multiring = len(list(filter(lambda x: x > 1, bond_ring_times))) py_mol = pybel.readstring('smi', smiles) if ri.NumRings() != len(py_mol.sssr): print( 'WARNING: SymmetricSSSR not equal to TRUE SSSR in rdkit. Use Openbabel instead:', smiles) n_atoms_multiring = pybel.Smarts('[R2]').findall(py_mol).__len__() n_bonds_multiring = n_atoms_multiring - 1 return n_atoms_multiring, n_bonds_multiring
def mark_reactants(source_mol: Mol, target_mol: Mol): target_atoms = set(a.GetAtomMapNum() for a in reversed(target_mol.GetAtoms())) for a in source_mol.GetAtoms(): m = a.GetAtomMapNum() if m in target_atoms: a.SetBoolProp('in_target', True)
def build_position_matrix(molecule: Mol) -> np.ndarray: conf = molecule.GetConformer() return np.array([[ conf.GetAtomPosition(k).x, conf.GetAtomPosition(k).y, conf.GetAtomPosition(k).z, ] for k in range(molecule.GetNumAtoms())])
def update_feat_values(mol: Mol, atom_props: dict, bond_props: dict): for atom in mol.GetAtoms(): for prop_key in atom_props.keys(): atom_props[prop_key].add(try_get_atom_feature(atom, prop_key)) for bond in mol.GetBonds(): for prop_key in bond_props.keys(): bond_props[prop_key].add(try_get_bond_feature(bond, prop_key))
def build_adjacency_matrix(molecule: Mol) -> np.ndarray: adj_matrix = np.eye(molecule.GetNumAtoms()) for bond in molecule.GetBonds(): begin_atom = bond.GetBeginAtom().GetIdx() end_atom = bond.GetEndAtom().GetIdx() adj_matrix[begin_atom, end_atom] = adj_matrix[end_atom, begin_atom] = 1 return adj_matrix
def add_map_numbers(mol: Mol) -> Mol: # converting to smiles to mol and again to smiles makes atom order canonical mol = Chem.MolFromSmiles(Chem.MolToSmiles(mol)) map_nums = np.arange(mol.GetNumAtoms()) + 1 np.random.shuffle(map_nums) for i, a in enumerate(mol.GetAtoms()): a.SetAtomMapNum(int(map_nums[i])) return mol
def add_benzene_ring(mol: Mol, start_atom_ind: int, ring_atom_maps: List[int]): new_atom_ind = [] map2i = dict((a.GetAtomMapNum(), i) for i, a in enumerate(mol.GetAtoms())) start_atom = mol.GetAtomWithIdx(start_atom_ind) start_atom.SetBoolProp('is_edited', True) start_atom.SetIsAromatic(True) start_atom_map = start_atom.GetAtomMapNum() if start_atom.HasProp('in_reactant'): in_reactant = start_atom.GetBoolProp('in_reactant') else: in_reactant = False if start_atom.HasProp('mol_id'): mol_id = start_atom.GetIntProp('mol_id') else: mol_id = 1 for atom_map in ring_atom_maps: if atom_map != start_atom_map: if atom_map in map2i: new_atom_ind.append(map2i[atom_map]) else: num_atoms = mol.GetNumAtoms() new_a = Chem.Atom(6) # benzene has only carbon atoms new_a.SetAtomMapNum(atom_map) new_a.SetIsAromatic(True) new_a.SetBoolProp('is_edited', True) new_a.SetBoolProp('in_reactant', in_reactant) new_a.SetIntProp('mol_id', mol_id) mol.AddAtom(new_a) new_atom_ind.append(num_atoms) else: new_atom_ind.append(start_atom_ind) for i in range(len(new_atom_ind) - 1): bond = mol.GetBondBetweenAtoms(new_atom_ind[i], new_atom_ind[i + 1]) if bond is None: bond_idx = mol.AddBond(new_atom_ind[i], new_atom_ind[i + 1], order=Chem.rdchem.BondType.AROMATIC) - 1 bond = mol.GetBondWithIdx(bond_idx) bond.SetBoolProp('is_edited', True) bond = mol.GetBondBetweenAtoms(new_atom_ind[0], new_atom_ind[-1]) if bond is None: bond_idx = mol.AddBond(new_atom_ind[0], new_atom_ind[-1], order=Chem.rdchem.BondType.AROMATIC) - 1 bond = mol.GetBondWithIdx(bond_idx) bond.SetBoolProp('is_edited', True) return mol
def find_rings(mol: Mol) -> List[List[int]]: ring_info = mol.GetRingInfo() rings = ring_info.AtomRings() i2map = dict((i, a.GetAtomMapNum()) for i, a in enumerate(mol.GetAtoms())) rings_mapped = [] for ring in rings: rings_mapped.append([i2map[i] for i in ring]) return rings_mapped
def fix_incomplete_mappings(sub_mol: Mol, prod_mol: Mol) -> Tuple[Mol, Mol]: max_map = max(a.GetAtomMapNum() for a in sub_mol.GetAtoms()) max_map = max(max(a.GetAtomMapNum() for a in prod_mol.GetAtoms()), max_map) for mol in (sub_mol, prod_mol): for a in mol.GetAtoms(): map_num = a.GetAtomMapNum() if map_num is None or map_num < 1: max_map += 1 a.SetAtomMapNum(max_map) return sub_mol, prod_mol
def rdmol_to_data(mol: Mol): assert mol.GetNumConformers() == 1 N = mol.GetNumAtoms() pos = torch.tensor(mol.GetConformer(0).GetPositions(), dtype=torch.float) atomic_number = [] aromatic = [] sp = [] sp2 = [] sp3 = [] num_hs = [] for atom in mol.GetAtoms(): atomic_number.append(atom.GetAtomicNum()) aromatic.append(1 if atom.GetIsAromatic() else 0) hybridization = atom.GetHybridization() sp.append(1 if hybridization == HybridizationType.SP else 0) sp2.append(1 if hybridization == HybridizationType.SP2 else 0) sp3.append(1 if hybridization == HybridizationType.SP3 else 0) z = torch.tensor(atomic_number, dtype=torch.long) row, col, edge_type = [], [], [] for bond in mol.GetBonds(): start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() row += [start, end] col += [end, start] edge_type += 2 * [BOND_TYPES[bond.GetBondType()]] edge_index = torch.tensor([row, col], dtype=torch.long) edge_type = torch.tensor(edge_type) perm = (edge_index[0] * N + edge_index[1]).argsort() edge_index = edge_index[:, perm] edge_type = edge_type[perm] row, col = edge_index hs = (z == 1).to(torch.float) num_hs = scatter(hs[row], col, dim_size=N).tolist() smiles = Chem.MolToSmiles(mol) data = Data(node_type=z, pos=pos, edge_index=edge_index, edge_type=edge_type, rdmol=copy.deepcopy(mol), smiles=smiles) data.nx = to_networkx(data, to_undirected=True) return data
def generate_conformers(lig_file, init='generate_conformers_init'): # option pdb_mol """Performs the following tasks: > Creates the receptor folders within binding_ligands and decoy_ligands > Converts all PDB crystal ligands into mol for future use > Generates conformers and saves them to crystal_ligands folder""" init = eval(init) lig_name = lig_file[len(init.lig_path)+1: ] # use rdkit to get a mol object from the PDB pdb_file = os.path.join(init.out_lig_path, lig_name) mol_file = os.path.join(init.mol_path, lig_name).replace('.pdb', '.sdf') # write the mol to a mol file for future use mol = Chem.MolFromPDBFile(lig_file) writer = SDWriter(mol_file) writer.write(mol) # generate conformers and get the number of atoms of the molecule mol2 = Chem.AddHs(mol) # addHs pdb_writer = PDBWriter(pdb_file) conf_ids = AllChem.EmbedMultipleConfs(mol2, init.num_conformers) # PDB has hydrogens for cid in conf_ids: AllChem.MMFFOptimizeMolecule(mol2, confId=cid) mol = Chem.RemoveHs(mol2) pdb_writer.write(mol) num_atoms = Mol.GetNumAtoms(mol) pdb_writer.close() # also has hydrogens print 'Generated conformers for one ligand' return [[pdb_file, mol_file, num_atoms]]
def find_added_benzene_rings(source_mol: Mol, target_mol: Mol) -> List[List[int]]: """ Find benzene rings that were added in the process of reaction generation """ target_rings = find_rings(target_mol) map2atom = dict( (a.GetAtomMapNum(), a) for i, a in enumerate(target_mol.GetAtoms())) source_atoms = set(a.GetAtomMapNum() for a in source_mol.GetAtoms()) added_benzene_rings = [] for ring in target_rings: if all(m not in source_atoms for m in ring) and is_benzene_ring([map2atom[m] for m in ring]): added_benzene_rings.append(ring) return added_benzene_rings
def fix_explicit_hs(mol: Mol) -> Mol: for a in mol.GetAtoms(): a.SetNoImplicit(False) mol = Chem.AddHs(mol, explicitOnly=True) mol = Chem.RemoveHs(mol) Chem.SanitizeMol(mol) return mol
def filter_reactants(sub_mols: List[Mol], prod_mol: Mol) -> Mol: mol_maps = set(a.GetAtomMapNum() for a in prod_mol.GetAtoms()) reactants = [] for mol in sub_mols: for a in mol.GetAtoms(): if a.GetAtomMapNum() in mol_maps: reactants.append(mol) break return Chem.MolFromSmiles('.'.join( [Chem.MolToSmiles(m) for m in reactants]))
def get_labels( self, mol: rdchem.Mol, label_names: Optional[Union[str, List[str]]] = None) -> List[str]: """Extract corresponding label info from the molecule. Params: ------- mol: rdkit.Chem.rdchem.Mol Molecule of interest. label_names: str or list of str or None, optional, default=None Name of label(s). Returns: -------- labels: list of str Label info, its length is equal to that of `label_name`. """ if label_names is None: return [] # Convert str to list for proper parsing if isinstance(label_names, str): label_names = [label_names] # # Extract labels and convert to float if num # labels = [] # for name in label_names: # if mol.HasProp(name): # val = mol.GetProp(name) # if val.replace('.', '', 1).isdigit(): # labels.append(float(val)) # else: # labels.append(val) # else: # labels.append(None) # return labels return [ float(mol.GetProp(name)) if mol.HasProp(name) else None for name in label_names ]
def mol_to_extended_graph(molecule: Mol, seed: int = 0) -> Graph: rng = np.random.default_rng(seed=seed) start = rng.integers(low=0, high=molecule.GetNumAtoms(), size=1).item() bond_graph = build_bond_graph(molecule) sequence = get_random_bf_sequence(graph=bond_graph, start=start, rng=rng) graph = Graph() for new_node in sequence: embed_node_in_graph(graph, new_node=new_node, bond_graph=bond_graph, rng=rng) return graph
def remove_bridge(molecule: Mol, root_pattern_smiles: str, removal_indices: List[int]) -> Optional[Mol]: root_pattern = Chem.MolFromSmiles(root_pattern_smiles) matches = molecule.GetSubstructMatches(root_pattern) if len(matches) == 0: return None match = matches[0] e_mol = Chem.EditableMol(molecule) indexes_to_delete = list(map(lambda i: match[i], removal_indices)) indexes_to_delete.sort(reverse=True) for i in indexes_to_delete: e_mol.RemoveAtom(i) molecule = e_mol.GetMol() return get_largest_fragment(molecule)
def get_bridge_idty(ligand: Mol, class_pattern: str) -> Optional[List[str]]: ligand = Chem.DeleteSubstructs( ligand, Chem.MolFromSmiles("[N+](=O)[O-]", sanitize=False)) root_pattern = Chem.MolFromSmiles(class_pattern, sanitize=False) chains = Chem.ReplaceCore(ligand, root_pattern) # display(chains) if chains is None: return None pieces = Chem.GetMolFrags(chains, asMols=True) ligands = sorted([Chem.MolToSmiles(x, True) for x in pieces], key=len) bridge = [] for ligand in ligands: if (Chem.MolFromSmiles(ligand) ).GetNumAtoms() < 20 and ligand.count("*") > 1: bridge.append(re.sub(r"\[\d\*\]", "*", ligand)) return bridge
def check_num_atoms(mol: rdchem.Mol, max_num_atoms: Optional[int] = -1) -> None: """Check number of atoms in `mol` does not exceed `max_num_atoms`. If number of atoms in `mol` exceeds the number `max_num_atoms`, it will raise `MolFeatureExtractionError` exception. Params: ------- mol: rdkit.Chem.rdchem.Mol The molecule to check. num_max_atoms: int, optional , default=-1 Maximum allowed number of atoms in a molecule. If negative, check passes unconditionally. """ num_atoms = mol.GetNumAtoms() if max_num_atoms >= 0 and num_atoms > max_num_atoms: raise MolFeatureExtractionError("Atoms in mol (N={}) exceeds " \ "num_max_atoms (N={}).".format(num_atoms, max_num_atoms))
def __init__(self, source_mol: RWMol, target_mol: Mol, action_vocab: dict, forward: bool = False, action_order: str = 'dfs'): self.source_mol = source_mol self.target_mol = target_mol self.randomize_action_types = 'random' in action_order self.randomize_map_atom_order = action_order == 'random' or 'randat' in action_order self.randomize_next_atom = action_order == 'random' self.action_order = action_order self.atoms_stack = [] if 'bfs' in self.action_order: for a in target_mol.GetAtoms(): self.atoms_stack.append(a.GetAtomMapNum()) self.atoms_stack = list(sorted(self.atoms_stack)) mark_reactants(source_mol, target_mol) self.edited_atoms = set() self.forward = forward self.action_vocab = action_vocab self.prop_dict = action_vocab['prop2oh'] self.added_rings = { 'benzene': find_added_benzene_rings(source_mol=source_mol, target_mol=target_mol) } self.current_step = 0 self.current_mol_graph = get_graph(self.source_mol, ravel=False, to_array=True, atom_prop2oh=self.prop_dict['atom'], bond_prop2oh=self.prop_dict['bond'])
def build_bond_graph(molecule: Mol) -> Graph: graph = Graph() for bond in molecule.GetBonds(): source_index, sink_index = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() graph.add_edge(source_index, sink_index) return graph
def construct_mol_features(mol: rdchem.Mol, out_size: Optional[int] = -1) -> np.ndarray: """Returns the atom features of all the atoms in the molecule. Params: ------- mol: rdkit.Chem.rdchem.Mol Molecule of interest. out_size: int, optional, default=-1 The size of the returned array. If this option is negative, it does not take any effect. Otherwise, it must be larger than or equal to the number of atoms in the input molecule. If so, the end of the array is padded with zeros. Returns: -------- mol_feats: np.ndarray, shape=(n,m) Where `n` is the total number of atoms within the molecule, and `m` is the number of feats. """ # Caluclate charges and chirality of atoms within molecule rdPartialCharges.ComputeGasteigerCharges( mol) # stored under _GasteigerCharge rdmolops.AssignStereochemistry( mol) # stored under _CIPCode, see doc for more info # Retrieve atom index locations of matches HYDROGEN_DONOR = rdmolfiles.MolFromSmarts( "[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0])" + ",n&H1&+0]") HYROGEN_ACCEPTOR = rdmolfiles.MolFromSmarts( "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])])" + ",$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),n&H0&+0," + "$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]") ACIDIC = rdmolfiles.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]") BASIC = rdmolfiles.MolFromSmarts( "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))])" + ",$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))])," + "$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]") hydrogen_donor_match = sum(mol.GetSubstructMatches(HYDROGEN_DONOR), ()) hydrogen_acceptor_match = sum(mol.GetSubstructMatches(HYROGEN_ACCEPTOR), ()) acidic_match = sum(mol.GetSubstructMatches(ACIDIC), ()) basic_match = sum(mol.GetSubstructMatches(BASIC), ()) # Get ring info ring = mol.GetRingInfo() mol_feats = [] n_atoms = mol.GetNumAtoms() for atom_idx in range(n_atoms): atom = mol.GetAtomWithIdx(atom_idx) atom_feats = [] atom_feats += one_hot(atom.GetSymbol(), [ 'C', 'O', 'N', 'S', 'Cl', 'F', 'Br', 'P', 'I', 'Si', 'B', 'Na', 'Sn', 'Se', 'other' ]) atom_feats += one_hot(atom.GetDegree(), [1, 2, 3, 4, 5, 6]) atom_feats += one_hot(atom.GetHybridization(), list(rdchem.HybridizationType.names.values())) atom_feats += one_hot(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6]) atom_feats += one_hot(atom.GetFormalCharge(), [-3, -2, -1, 0, 1, 2, 3]) g_charge = float(atom.GetProp("_GasteigerCharge")) atom_feats += [g_charge] if not np.isnan(g_charge) else [0.] atom_feats += [atom.GetIsAromatic()] atom_feats += [ ring.IsAtomInRingOfSize(atom_idx, size) for size in range(3, 9) ] atom_feats += one_hot(atom.GetTotalNumHs(), [0, 1, 2, 3, 4]) # Chirality try: atom_feats += one_hot(atom.GetProp('_CIPCode'), ["R", "S"]) + [ atom.HasProp("_ChiralityPossible") ] except: atom_feats += [False, False] + [atom.HasProp("_ChiralityPossible")] # Hydrogen bonding atom_feats += [atom_idx in hydrogen_donor_match] atom_feats += [atom_idx in hydrogen_acceptor_match] # Is Acidic/Basic atom_feats += [atom_idx in acidic_match] atom_feats += [atom_idx in basic_match] mol_feats.append(atom_feats) if out_size < 0: return np.array(mol_feats, dtype=np.float) elif out_size >= n_atoms: # 'empty' padding for `mol_feats`. Generate(s) feature matrix of same size for all mols # NOTE: len(mol_feats[0]) is the number of feats padded_mol_feats = np.zeros((out_size, len(mol_feats[0])), dtype=np.float) padded_mol_feats[:n_atoms] = np.array(mol_feats, dtype=np.float) return padded_mol_feats else: raise ValueError( '`out_size` (N={}) must be negative or larger than or ' 'equal to the number of atoms in the input molecules (N={}).'. format(out_size, n_atoms))
def construct_pos_matrix(mol: rdchem.Mol, out_size: Optional[int] = -1) -> np.ndarray: """Construct relative positions from each atom within the molecule. Params: ------- mol: rdkit.Chem.rdchem.Mol Molecule of interest. out_size: int, optional, default=-1 The size of the returned array. If this option is negative, it does not take any effect. Otherwise, it must be larger than or equal to the number of atoms in the input molecule. If so, the end of the array is padded with zeros. Returns: -------- pos_matrix: np.ndarray, shape=(n,n,3) Relative position (XYZ) coordinates from one atom the others in the mol. Examples: --------- ```python >>> from rdkit import Chem >>> from rdkit.Chem import AllChem >>> smiles = 'N[C@@]([H])([C@]([H])(O2)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])([C@]([H])' \ '(O)C)C(=O)N[C@@]([H])(Cc1ccc(O)cc1)C(=O)2' >>> mol = Chem.MolFromSmiles(smiles) >>> mol = Chem.AddHs(mol, addCoords=True) >>> AllChem.EmbedMolecule(mol, AllChem.ETKDG()) >>> mol = Chem.RemoveHs(mol) >>> pos_matrix = construct_pos_matrix(mol, out_size=-1) >>> pos_matrix.shape (34,34,3) >>> pos_matrix = construct_pos_matrix(mol, out_size=49) >>> pos_matrix.shape (49,49,3) ``` """ # Obtain initial distance geometry between atoms, if unavilable if mol.GetNumConformers() == 0: mol = rdmolops.AddHs(mol, addCoords=True) rdDistGeom.EmbedMolecule(mol, rdDistGeom.ETKDG()) mol = rdmolops.RemoveHs(mol) coords = mol.GetConformer().GetPositions() # shape=(N,3) N = mol.GetNumAtoms() # Determine appropiate output size to generate feature matrix of same size for all mols. if out_size < 0: size = N elif out_size >= N: size = out_size else: raise ValueError( '`out_size` (N={}) is smaller than number of atoms in mol (N={})'. format(out_size, N)) pos_matrix = np.zeros(shape=(size, size, 3), dtype=np.float) for atom_idx in range(N): atom_pos = coords[atom_idx] # central atom of interest for neighbor_idx in range(N): neigh_pos = coords[neighbor_idx] # neighboring atom pos_matrix[ atom_idx, neighbor_idx] = atom_pos - neigh_pos # dist between neighbor -> center return pos_matrix
repo = COCONUT() # print(repo.count()) i = 0 converted, not_converted = 0, 0 converted_list_file = open("out/inchi_valid_check/converted.txt", mode="w", encoding="utf-8") converted_not_match_file = open( "out/inchi_valid_check/converted_not_match.txt", mode="w", encoding="utf-8") w = SDWriter("out/inchi_valid_check/converted.sdf") np: Unique_NP for np in repo.get_unique_stream(): mol = Mol() try: mol = Chem.MolFromInchi(inchi=np.inchi, treatWarningAsError=True) mol.SetProp("coconut_id", np.coconut_id) except: not_converted += 1 if mol: mol_inchikey = inchi.MolToInchiKey(mol) if np.inchikey == mol_inchikey: converted_list_file.write(np.inchi + "\n") w.write(mol) converted += 1 else: converted_not_match_file.write(np.inchi + "\n")
def display_numbered(mol: Mol): mol = deepcopy(mol) for atom in mol.GetAtoms(): atom.SetAtomMapNum(atom.GetIdx()) display(mol)
def renumber_atoms_for_mapping(mol: Mol) -> Mol: new_order = [] for a in mol.GetAtoms(): new_order.append(a.GetAtomMapNum()) new_order = [int(a) for a in np.argsort(new_order)] return RenumberAtoms(mol, new_order)
def get_atom_ind(mol: Mol, atom_map: int) -> int: for i, a in enumerate(mol.GetAtoms()): if a.GetAtomMapNum() == atom_map: return i raise ValueError(f'No atom with map number: {atom_map}')
merge_cols=['cryst_lig_file'], order=pair_idx) # Extract filepaths to PDB and Mol files conn = sqlite3.connect(db_path) cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") _, _, _, _, out_table_generate = cursor.fetchall() out_table_generate = out_table_generate[0] conn.close() table_data = db_master.retrieve(out_table_generate, ['bind_lig_files', 'mol_files'], {'run_idx': '{}<100000'}) all_pdb_files = [table_data[0][i].encode('ascii','ignore') for i in range(len(table_data[0]))] all_mol_files = [table_data[1][i].encode('ascii','ignore') for i in range(len(table_data[0]))] all_mols = [Chem.MolFromMolFile(all_mol_files[i]) for i in range(len(all_mol_files))] all_num_atoms = [Mol.GetNumAtoms(all_mols[i]) for i in range(len(all_mols))] GetDecoysInit(all_pdb_files, all_mol_files, all_mols, all_num_atoms, max_atom_dif=2, max_substruct=4, max_num_decoys=10) afdb.run_multithread(func='get_decoys', arg_types=[str, str, int], arg_lists=[all_pdb_files, all_mol_files, all_num_atoms], out_types=[str, str], out_names=['bind_lig_files', 'decoy_ligs'], num_threads=100, commit_sec=1) print '\nGot decoys for each ligand in:', str(time.time()-start), 'seconds\n' """Generate conformers for all the decoy ligands---------------------------------"""
def build_atom_features_matrix(mol: Mol) -> np.ndarray: return np.array([get_atom_features(atom) for atom in mol.GetAtoms()])
def get_smallest_root_match(self, mol: Mol) -> Mol: search_space: Set[Mol] = set(self.molecules.values()) all_idxs = set(range(0, mol.GetNumAtoms())) included_idxs = {0} bonds = {} def register_bond(from_idx: int, to_idx: int): entry = bonds.get(from_idx) if entry is None: entry = [] bonds[from_idx] = entry entry.append(to_idx) for bond in mol.GetBonds(): begin = bond.GetBeginAtomIdx() end = bond.GetEndAtomIdx() register_bond(begin, end) register_bond(end, begin) while len(included_idxs) < mol.GetNumAtoms(): frontier_permutations = reduce( lambda perms, from_idx: perms | set( map( lambda to_idx: frozenset([*included_idxs, to_idx]), filter(lambda idx: idx not in included_idxs, bonds[from_idx]) ) ), included_idxs, set() ) new_search_space = set() for perm in frontier_permutations: e_mol = Chem.EditableMol(mol) perm_idxs = list(all_idxs - perm) perm_idxs.sort(reverse=True) for idx in perm_idxs: e_mol.RemoveAtom(idx) display('mul') display_numbered(e_mol.GetMol()) new_search_space |= set(self.find_superstructures(e_mol.GetMol(), search_space)) included_idxs |= perm if len(new_search_space) == 0: return self.get_smallest_mol(list(search_space)) search_space = new_search_space if len(search_space) < 100: display("from mul") for s in search_space: display_numbered(s) if len(frontier_permutations) > 1: e_mol = Chem.EditableMol(mol) perm_idxs = list(all_idxs - included_idxs) perm_idxs.sort(reverse=True) for idx in perm_idxs: e_mol.RemoveAtom(idx) display('single') display_numbered(e_mol.GetMol()) new_search_space = set(self.find_superstructures(e_mol.GetMol(), search_space)) if len(new_search_space) == 0: return self.get_smallest_mol(list(search_space)) search_space = new_search_space if len(search_space) < 100: display("from mul") for s in search_space: display_numbered(s)