def rdmol_to_psi4mols(rdmol: rdkit.Chem.Mol, name: str = None): """Convert RDKit molecule to one or more Psi4 molecules, one for each conformer. Parameters ---------- rdmol: rdkit.Chem.Mol RDKit molecule with at least one conformer name: str (optional) Molecule name Returns ------- list of :class:`psi4.core.Molecule` """ confs = rdmol.GetConformers() n_atoms = rdmol.GetNumAtoms() atoms = [rdmol.GetAtomWithIdx(i) for i in range(n_atoms)] symbols = [a.GetSymbol() for a in atoms] ATOM = "{sym} {x[0]} {x[1]} {x[2]}" if name is None: name = "Mol" mols = [] for i, c in enumerate(confs, 1): pos = c.GetPositions() xyz = [ATOM.format(sym=a, x=x) for a, x in zip(symbols, pos)] txt = f"{n_atoms}\n0 1 {name}_c{i:03d}\n" + "\n".join(xyz) mol = psi4.core.Molecule.from_string(txt, dtype="xyz") mol.set_molecular_charge(0) mol.set_multiplicity(1) mols.append(mol) return mols
def _parse_atoms_from_svg(atom_elements, mol: rdkit.Chem.Mol): """Extract atoms from the SVG atom elements Args: atom_elements (list[xml.etree.ElementTree.Element]): List of extracted XML elements mol (rdkit.Chem.rdchem.Mol): RDkit molecule Returns: list[dict]: list of JSON-style atom representation. """ result = [] for atom_svg in atom_elements: try: atom_id_str = re.search(r"\d+", atom_svg.attrib.get("class")).group(0) atom_id = int(atom_id_str) if atom_id >= mol.GetNumAtoms(): continue temp = { "name": mol.GetAtomWithIdx(atom_id).GetProp("name"), "labels": [], "x": float(atom_svg.attrib.get("cx")), "y": float(atom_svg.attrib.get("cy")), } result.append(temp) except RuntimeError: pass # we do not care for H atoms return result
def mol_to_graph(self, molecule: rdkit.Chem.Mol) -> None: """ Generates the graph representation (`self.node_features` and `self.edge_features`) when creating a new `PreprocessingGraph`. """ n_atoms = self.n_nodes atoms = map(molecule.GetAtomWithIdx, range(n_atoms)) # build the node features matrix using a Numpy array node_features = np.array(list(map(self.atom_features, atoms)), dtype=np.int32) # build the edge features tensor using a Numpy array edge_features = np.zeros( [n_atoms, n_atoms, self.constants.n_edge_features], dtype=np.int32) for bond in molecule.GetBonds(): i = bond.GetBeginAtomIdx() j = bond.GetEndAtomIdx() bond_type = self.constants.bondtype_to_int[bond.GetBondType()] edge_features[i, j, bond_type] = 1 edge_features[j, i, bond_type] = 1 # define the number of nodes self.n_nodes = n_atoms self.node_features = node_features # not padded! self.edge_features = edge_features # not padded!
def rdmol_as_array(rdmol: rdkit.Chem.Mol) -> np.ndarray: """Convert an rdkit molecule into an array of Cartesian coordinates.""" def get_xyz(atom: rdkit.Chem.Atom) -> Tuple[float, float, float]: pos = conf.GetAtomPosition(atom.GetIdx()) return (pos.x, pos.y, pos.z) conf = rdmol.GetConformer(id=-1) atoms = rdmol.GetAtoms() atom_count = len(atoms) count = 3 * atom_count shape = atom_count, 3 iterator = itertools.chain.from_iterable(get_xyz(at) for at in atoms) ret = np.fromiter(iterator, count=count, dtype=float) ret.shape = shape return ret
def __init__(self, constants: namedtuple, molecule: rdkit.Chem.Mol, node_features: torch.Tensor, edge_features: torch.Tensor) -> None: super().__init__(constants, molecule=False, node_features=False, edge_features=False, atom_feature_vector=False) try: self.n_nodes = molecule.GetNumAtoms() except AttributeError: self.n_nodes = 0 self.molecule = molecule self.node_features = node_features self.edge_features = edge_features
def __init__(self, constants: namedtuple, molecule: rdkit.Chem.Mol) -> None: super().__init__(constants, molecule=False, node_features=False, edge_features=False, atom_feature_vector=False) # define values previously set to `None` or undefined self.node_ordering = None # to be defined in `self.node_remap()` if self.constants.use_explicit_H and not self.constants.ignore_H: molecule = rdkit.Chem.AddHs(molecule) self.n_nodes = molecule.GetNumAtoms() # get the graph attributes from the `rdkit.Chem.Mol()` object self.mol_to_graph(molecule=molecule) # remap the nodes using either a canonical or random node ordering self.node_remap(molecule=molecule) # pad up to size of largest graph in dataset (`self.constants.max_n_nodes`) self.pad_graph_representation()
def _connectivity_COO_format(mol: rdkit.Chem.Mol) -> np.ndarray: """ Returns the connectivity of the molecular graph in COO format. Parameters ---------- mol: rdkit.Chem.Mol rdkit molecule to extract bonds from Returns ------- np.ndarray graph connectivity in COO format with shape ``[2, num_edges]`` """ row, col = [], [] # TODO: Is GetBonds() deterministic? for bond in mol.GetBonds(): start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() row += [start, end] col += [end, start] return np.array([row, col])
def convert_svg(svg_string, ccd_id, mol: rdkit.Chem.Mol): """Parse information from SVG depiction into object. Args: svg_string (str): SVG as string. mol (rdkit.Chem.Mol): RDKit mol object used for depiction. Returns: :obj:`dict` of :obj:`dict`: object with all the details for json serialization. """ result_bag = OrderedDict([('ccd_id', ccd_id), ('resolution', {}), ('atoms', []), ('bonds', [])]) svg_string = _fix_svg(svg_string) svg = ET.fromstring(svg_string) atom_elem = svg.findall('{http://www.w3.org/2000/svg}circle') bond_elem = svg.findall('{http://www.w3.org/2000/svg}path') dimensions_svg = svg.find('{http://www.w3.org/2000/svg}rect') label_elem = svg.findall('{http://www.w3.org/2000/svg}text') kd_tree = None for atom_svg in atom_elem: atom_id_str = re.search(r'\d+', atom_svg.attrib.get('class')).group(0) atom_id = int(atom_id_str) temp = { 'name': mol.GetAtomWithIdx(atom_id).GetProp('name'), 'label': {}, 'x': float(atom_svg.attrib.get('cx')), 'y': float(atom_svg.attrib.get('cy')) } result_bag['atoms'].append(temp) atom_centers = [[atom['x'], atom['y']] for atom in result_bag['atoms']] kd_tree = KDTree(atom_centers) for bond_svg in bond_elem: if 'class' not in bond_svg.attrib: continue bond_id = int(re.search(r'\d+', bond_svg.attrib['class']).group(0)) bond = mol.GetBondWithIdx(bond_id) temp = { 'bgn': bond.GetBeginAtom().GetProp('name'), 'end': bond.GetEndAtom().GetProp('name'), 'coords': bond_svg.attrib.get('d'), 'style': bond_svg.attrib.get('style') } result_bag['bonds'].append(temp) for label_svg in label_elem: temp = { 'x': float(label_svg.attrib.get('x')), 'y': float(label_svg.attrib.get('y')), 'style': label_svg.attrib.get('style'), 'tspans': [{ 'value': tspan.text, 'style': '' if tspan.attrib.get('style') is None else tspan.attrib.get('style') } for tspan in filter( lambda x: x.text is not None, label_svg.findall('{http://www.w3.org/2000/svg}tspan'))] } nearest_index = kd_tree.query([temp['x'], temp['y']])[1] result_bag['atoms'][nearest_index]['label'] = temp result_bag['resolution'] = { 'x': float(dimensions_svg.attrib.get('width')), 'y': float(dimensions_svg.attrib.get('height')) } return result_bag