def update_feat_values(mol: Mol, atom_props: dict, bond_props: dict): for atom in mol.GetAtoms(): for prop_key in atom_props.keys(): atom_props[prop_key].add(try_get_atom_feature(atom, prop_key)) for bond in mol.GetBonds(): for prop_key in bond_props.keys(): bond_props[prop_key].add(try_get_bond_feature(bond, prop_key))
def build_adjacency_matrix(molecule: Mol) -> np.ndarray: adj_matrix = np.eye(molecule.GetNumAtoms()) for bond in molecule.GetBonds(): begin_atom = bond.GetBeginAtom().GetIdx() end_atom = bond.GetEndAtom().GetIdx() adj_matrix[begin_atom, end_atom] = adj_matrix[end_atom, begin_atom] = 1 return adj_matrix
def rdmol_to_data(mol: Mol): assert mol.GetNumConformers() == 1 N = mol.GetNumAtoms() pos = torch.tensor(mol.GetConformer(0).GetPositions(), dtype=torch.float) atomic_number = [] aromatic = [] sp = [] sp2 = [] sp3 = [] num_hs = [] for atom in mol.GetAtoms(): atomic_number.append(atom.GetAtomicNum()) aromatic.append(1 if atom.GetIsAromatic() else 0) hybridization = atom.GetHybridization() sp.append(1 if hybridization == HybridizationType.SP else 0) sp2.append(1 if hybridization == HybridizationType.SP2 else 0) sp3.append(1 if hybridization == HybridizationType.SP3 else 0) z = torch.tensor(atomic_number, dtype=torch.long) row, col, edge_type = [], [], [] for bond in mol.GetBonds(): start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() row += [start, end] col += [end, start] edge_type += 2 * [BOND_TYPES[bond.GetBondType()]] edge_index = torch.tensor([row, col], dtype=torch.long) edge_type = torch.tensor(edge_type) perm = (edge_index[0] * N + edge_index[1]).argsort() edge_index = edge_index[:, perm] edge_type = edge_type[perm] row, col = edge_index hs = (z == 1).to(torch.float) num_hs = scatter(hs[row], col, dim_size=N).tolist() smiles = Chem.MolToSmiles(mol) data = Data(node_type=z, pos=pos, edge_index=edge_index, edge_type=edge_type, rdmol=copy.deepcopy(mol), smiles=smiles) data.nx = to_networkx(data, to_undirected=True) return data
def build_bond_graph(molecule: Mol) -> Graph: graph = Graph() for bond in molecule.GetBonds(): source_index, sink_index = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() graph.add_edge(source_index, sink_index) return graph
def get_smallest_root_match(self, mol: Mol) -> Mol: search_space: Set[Mol] = set(self.molecules.values()) all_idxs = set(range(0, mol.GetNumAtoms())) included_idxs = {0} bonds = {} def register_bond(from_idx: int, to_idx: int): entry = bonds.get(from_idx) if entry is None: entry = [] bonds[from_idx] = entry entry.append(to_idx) for bond in mol.GetBonds(): begin = bond.GetBeginAtomIdx() end = bond.GetEndAtomIdx() register_bond(begin, end) register_bond(end, begin) while len(included_idxs) < mol.GetNumAtoms(): frontier_permutations = reduce( lambda perms, from_idx: perms | set( map( lambda to_idx: frozenset([*included_idxs, to_idx]), filter(lambda idx: idx not in included_idxs, bonds[from_idx]) ) ), included_idxs, set() ) new_search_space = set() for perm in frontier_permutations: e_mol = Chem.EditableMol(mol) perm_idxs = list(all_idxs - perm) perm_idxs.sort(reverse=True) for idx in perm_idxs: e_mol.RemoveAtom(idx) display('mul') display_numbered(e_mol.GetMol()) new_search_space |= set(self.find_superstructures(e_mol.GetMol(), search_space)) included_idxs |= perm if len(new_search_space) == 0: return self.get_smallest_mol(list(search_space)) search_space = new_search_space if len(search_space) < 100: display("from mul") for s in search_space: display_numbered(s) if len(frontier_permutations) > 1: e_mol = Chem.EditableMol(mol) perm_idxs = list(all_idxs - included_idxs) perm_idxs.sort(reverse=True) for idx in perm_idxs: e_mol.RemoveAtom(idx) display('single') display_numbered(e_mol.GetMol()) new_search_space = set(self.find_superstructures(e_mol.GetMol(), search_space)) if len(new_search_space) == 0: return self.get_smallest_mol(list(search_space)) search_space = new_search_space if len(search_space) < 100: display("from mul") for s in search_space: display_numbered(s)