Example #1
0
def rdmol_to_psi4mols(rdmol: rdkit.Chem.Mol, name: str = None):
    """Convert RDKit molecule to one or more Psi4 molecules,
    one for each conformer.

    Parameters
    ----------
    rdmol: rdkit.Chem.Mol
        RDKit molecule with at least one conformer
    name: str (optional)
        Molecule name

    Returns
    -------
    list of :class:`psi4.core.Molecule`
    """
    confs = rdmol.GetConformers()
    n_atoms = rdmol.GetNumAtoms()
    atoms = [rdmol.GetAtomWithIdx(i) for i in range(n_atoms)]
    symbols = [a.GetSymbol() for a in atoms]
    ATOM = "{sym} {x[0]} {x[1]} {x[2]}"

    if name is None:
        name = "Mol"

    mols = []
    for i, c in enumerate(confs, 1):
        pos = c.GetPositions()
        xyz = [ATOM.format(sym=a, x=x) for a, x in zip(symbols, pos)]
        txt = f"{n_atoms}\n0 1 {name}_c{i:03d}\n" + "\n".join(xyz)
        mol = psi4.core.Molecule.from_string(txt, dtype="xyz")
        mol.set_molecular_charge(0)
        mol.set_multiplicity(1)
        mols.append(mol)

    return mols
Example #2
0
def _parse_atoms_from_svg(atom_elements, mol: rdkit.Chem.Mol):
    """Extract atoms from the SVG atom elements

    Args:
        atom_elements (list[xml.etree.ElementTree.Element]): List of extracted XML elements
        mol (rdkit.Chem.rdchem.Mol): RDkit molecule

    Returns:
        list[dict]: list of JSON-style atom representation.
    """
    result = []
    for atom_svg in atom_elements:
        try:
            atom_id_str = re.search(r"\d+",
                                    atom_svg.attrib.get("class")).group(0)
            atom_id = int(atom_id_str)

            if atom_id >= mol.GetNumAtoms():
                continue

            temp = {
                "name": mol.GetAtomWithIdx(atom_id).GetProp("name"),
                "labels": [],
                "x": float(atom_svg.attrib.get("cx")),
                "y": float(atom_svg.attrib.get("cy")),
            }
            result.append(temp)
        except RuntimeError:
            pass  # we do not care for H atoms

    return result
Example #3
0
    def mol_to_graph(self, molecule: rdkit.Chem.Mol) -> None:
        """
        Generates the graph representation (`self.node_features` and `self.edge_features`)
        when creating a new `PreprocessingGraph`.
        """
        n_atoms = self.n_nodes
        atoms = map(molecule.GetAtomWithIdx, range(n_atoms))

        # build the node features matrix using a Numpy array
        node_features = np.array(list(map(self.atom_features, atoms)),
                                 dtype=np.int32)

        # build the edge features tensor using a Numpy array
        edge_features = np.zeros(
            [n_atoms, n_atoms, self.constants.n_edge_features], dtype=np.int32)
        for bond in molecule.GetBonds():
            i = bond.GetBeginAtomIdx()
            j = bond.GetEndAtomIdx()
            bond_type = self.constants.bondtype_to_int[bond.GetBondType()]
            edge_features[i, j, bond_type] = 1
            edge_features[j, i, bond_type] = 1

        # define the number of nodes
        self.n_nodes = n_atoms

        self.node_features = node_features  # not padded!
        self.edge_features = edge_features  # not padded!
Example #4
0
def rdmol_as_array(rdmol: rdkit.Chem.Mol) -> np.ndarray:
    """Convert an rdkit molecule into an array of Cartesian coordinates."""
    def get_xyz(atom: rdkit.Chem.Atom) -> Tuple[float, float, float]:
        pos = conf.GetAtomPosition(atom.GetIdx())
        return (pos.x, pos.y, pos.z)

    conf = rdmol.GetConformer(id=-1)
    atoms = rdmol.GetAtoms()

    atom_count = len(atoms)
    count = 3 * atom_count
    shape = atom_count, 3

    iterator = itertools.chain.from_iterable(get_xyz(at) for at in atoms)
    ret = np.fromiter(iterator, count=count, dtype=float)
    ret.shape = shape
    return ret
Example #5
0
    def __init__(self, constants: namedtuple, molecule: rdkit.Chem.Mol,
                 node_features: torch.Tensor,
                 edge_features: torch.Tensor) -> None:
        super().__init__(constants,
                         molecule=False,
                         node_features=False,
                         edge_features=False,
                         atom_feature_vector=False)

        try:
            self.n_nodes = molecule.GetNumAtoms()
        except AttributeError:
            self.n_nodes = 0

        self.molecule = molecule
        self.node_features = node_features
        self.edge_features = edge_features
Example #6
0
    def __init__(self, constants: namedtuple,
                 molecule: rdkit.Chem.Mol) -> None:
        super().__init__(constants,
                         molecule=False,
                         node_features=False,
                         edge_features=False,
                         atom_feature_vector=False)

        # define values previously set to `None` or undefined
        self.node_ordering = None  # to be defined in `self.node_remap()`

        if self.constants.use_explicit_H and not self.constants.ignore_H:
            molecule = rdkit.Chem.AddHs(molecule)

        self.n_nodes = molecule.GetNumAtoms()

        # get the graph attributes from the `rdkit.Chem.Mol()` object
        self.mol_to_graph(molecule=molecule)

        # remap the nodes using either a canonical or random node ordering
        self.node_remap(molecule=molecule)

        # pad up to size of largest graph in dataset (`self.constants.max_n_nodes`)
        self.pad_graph_representation()
Example #7
0
    def _connectivity_COO_format(mol: rdkit.Chem.Mol) -> np.ndarray:
        """
        Returns the connectivity of the molecular graph in COO format.

        Parameters
        ----------
        mol: rdkit.Chem.Mol
            rdkit molecule to extract bonds from

        Returns
        -------
        np.ndarray
            graph connectivity in COO format with shape ``[2, num_edges]``
        """

        row, col = [], []

        # TODO: Is GetBonds() deterministic?
        for bond in mol.GetBonds():
            start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
            row += [start, end]
            col += [end, start]

        return np.array([row, col])
Example #8
0
def convert_svg(svg_string, ccd_id, mol: rdkit.Chem.Mol):
    """Parse information from SVG depiction into object.

    Args:
        svg_string (str): SVG as string.
        mol (rdkit.Chem.Mol): RDKit mol object used for depiction.

    Returns:
        :obj:`dict` of :obj:`dict`: object with all the details for
        json serialization.
    """

    result_bag = OrderedDict([('ccd_id', ccd_id), ('resolution', {}),
                              ('atoms', []), ('bonds', [])])
    svg_string = _fix_svg(svg_string)
    svg = ET.fromstring(svg_string)

    atom_elem = svg.findall('{http://www.w3.org/2000/svg}circle')
    bond_elem = svg.findall('{http://www.w3.org/2000/svg}path')
    dimensions_svg = svg.find('{http://www.w3.org/2000/svg}rect')
    label_elem = svg.findall('{http://www.w3.org/2000/svg}text')
    kd_tree = None

    for atom_svg in atom_elem:
        atom_id_str = re.search(r'\d+', atom_svg.attrib.get('class')).group(0)
        atom_id = int(atom_id_str)
        temp = {
            'name': mol.GetAtomWithIdx(atom_id).GetProp('name'),
            'label': {},
            'x': float(atom_svg.attrib.get('cx')),
            'y': float(atom_svg.attrib.get('cy'))
        }
        result_bag['atoms'].append(temp)

    atom_centers = [[atom['x'], atom['y']] for atom in result_bag['atoms']]
    kd_tree = KDTree(atom_centers)

    for bond_svg in bond_elem:
        if 'class' not in bond_svg.attrib:
            continue

        bond_id = int(re.search(r'\d+', bond_svg.attrib['class']).group(0))
        bond = mol.GetBondWithIdx(bond_id)
        temp = {
            'bgn': bond.GetBeginAtom().GetProp('name'),
            'end': bond.GetEndAtom().GetProp('name'),
            'coords': bond_svg.attrib.get('d'),
            'style': bond_svg.attrib.get('style')
        }
        result_bag['bonds'].append(temp)

    for label_svg in label_elem:
        temp = {
            'x':
            float(label_svg.attrib.get('x')),
            'y':
            float(label_svg.attrib.get('y')),
            'style':
            label_svg.attrib.get('style'),
            'tspans': [{
                'value':
                tspan.text,
                'style':
                '' if tspan.attrib.get('style') is None else
                tspan.attrib.get('style')
            } for tspan in filter(
                lambda x: x.text is not None,
                label_svg.findall('{http://www.w3.org/2000/svg}tspan'))]
        }
        nearest_index = kd_tree.query([temp['x'], temp['y']])[1]
        result_bag['atoms'][nearest_index]['label'] = temp

    result_bag['resolution'] = {
        'x': float(dimensions_svg.attrib.get('width')),
        'y': float(dimensions_svg.attrib.get('height'))
    }

    return result_bag