Exemple #1
0
def rdmol_to_data(mol: Mol):
    assert mol.GetNumConformers() == 1
    N = mol.GetNumAtoms()

    pos = torch.tensor(mol.GetConformer(0).GetPositions(), dtype=torch.float)

    atomic_number = []
    aromatic = []
    sp = []
    sp2 = []
    sp3 = []
    num_hs = []
    for atom in mol.GetAtoms():
        atomic_number.append(atom.GetAtomicNum())
        aromatic.append(1 if atom.GetIsAromatic() else 0)
        hybridization = atom.GetHybridization()
        sp.append(1 if hybridization == HybridizationType.SP else 0)
        sp2.append(1 if hybridization == HybridizationType.SP2 else 0)
        sp3.append(1 if hybridization == HybridizationType.SP3 else 0)

    z = torch.tensor(atomic_number, dtype=torch.long)

    row, col, edge_type = [], [], []
    for bond in mol.GetBonds():
        start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
        row += [start, end]
        col += [end, start]
        edge_type += 2 * [BOND_TYPES[bond.GetBondType()]]

    edge_index = torch.tensor([row, col], dtype=torch.long)
    edge_type = torch.tensor(edge_type)

    perm = (edge_index[0] * N + edge_index[1]).argsort()
    edge_index = edge_index[:, perm]
    edge_type = edge_type[perm]

    row, col = edge_index
    hs = (z == 1).to(torch.float)
    num_hs = scatter(hs[row], col, dim_size=N).tolist()

    smiles = Chem.MolToSmiles(mol)

    data = Data(node_type=z,
                pos=pos,
                edge_index=edge_index,
                edge_type=edge_type,
                rdmol=copy.deepcopy(mol),
                smiles=smiles)
    data.nx = to_networkx(data, to_undirected=True)

    return data
Exemple #2
0
def construct_pos_matrix(mol: rdchem.Mol,
                         out_size: Optional[int] = -1) -> np.ndarray:
    """Construct relative positions from each atom within the molecule.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        Molecule of interest. 

    out_size: int, optional, default=-1
        The size of the returned array. If this option is negative, it 
        does not take any effect. Otherwise, it must be larger than or 
        equal to the number of atoms in the input molecule. If so, the 
        end of the array is padded with zeros.

    Returns:
    --------
    pos_matrix: np.ndarray, shape=(n,n,3)
        Relative position (XYZ) coordinates from one atom the others in 
        the mol. 

    Examples:
    ---------
    ```python
    >>> from rdkit import Chem
    >>> from rdkit.Chem import AllChem
    >>> smiles = 'N[C@@]([H])([C@]([H])(O2)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])([C@]([H])' \
                 '(O)C)C(=O)N[C@@]([H])(Cc1ccc(O)cc1)C(=O)2'
    >>> mol = Chem.MolFromSmiles(smiles)
    >>> mol = Chem.AddHs(mol, addCoords=True)
    >>> AllChem.EmbedMolecule(mol, AllChem.ETKDG())
    >>> mol = Chem.RemoveHs(mol)
    >>> pos_matrix = construct_pos_matrix(mol, out_size=-1)
    >>> pos_matrix.shape
    (34,34,3)

    >>> pos_matrix = construct_pos_matrix(mol, out_size=49)
    >>> pos_matrix.shape
    (49,49,3)
    ```
    """
    # Obtain initial distance geometry between atoms, if unavilable
    if mol.GetNumConformers() == 0:
        mol = rdmolops.AddHs(mol, addCoords=True)
        rdDistGeom.EmbedMolecule(mol, rdDistGeom.ETKDG())
        mol = rdmolops.RemoveHs(mol)
    coords = mol.GetConformer().GetPositions()  # shape=(N,3)
    N = mol.GetNumAtoms()

    # Determine appropiate output size to generate feature matrix of same size for all mols.
    if out_size < 0:
        size = N
    elif out_size >= N:
        size = out_size
    else:
        raise ValueError(
            '`out_size` (N={}) is smaller than number of atoms in mol (N={})'.
            format(out_size, N))

    pos_matrix = np.zeros(shape=(size, size, 3), dtype=np.float)
    for atom_idx in range(N):
        atom_pos = coords[atom_idx]  # central atom of interest
        for neighbor_idx in range(N):
            neigh_pos = coords[neighbor_idx]  # neighboring atom
            pos_matrix[
                atom_idx,
                neighbor_idx] = atom_pos - neigh_pos  # dist between neighbor -> center
    return pos_matrix
Exemple #3
0
def _get_conformer(mol: rdchem.Mol,
                   conformer: str = "min",
                   algo: str = "MMFF") -> rdchem.Mol:
    """Get molecule conformer from PDB file based on parameters
    provided.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        Molecule of interest, ideally with mutiple conformers.

    conformer: str, optional, default="min"
        Which conformer to select for 3D coordinates. If "min" (or "max"),
        then the conformer with the min (or max) energy is selected. If
        "first" or "last", then the first or last conformer is selected.
        If "avg", then the average position of all the conformers are
        averaged.

    algo: str, optional, default="MMFF"
        Which force field algorithm to optimize the coordinates with.
        Read rdkit description to determine which one is best suited
        for your application.

    Returns:
    --------
    mol: rdkit.Chem.rdchem.Mol
        Molecule with conformer of interest.
    """
    forcefields = {
        "MMFF": rdForceFieldHelpers.MMFFOptimizeMoleculeConfs,
        "UFF": rdForceFieldHelpers.UFFOptimizeMoleculeConfs
    }

    if conformer == "min":
        # Get idx of lowest energy conformation
        idx = np.argmin(forcefields[algo](mol, maxIters=0), axis=0)[1]
        conf = mol.GetConformers()[idx]
    elif conformer == "max":
        # Get idx of highest energy conformation
        idx = np.argmax(forcefields[algo](mol, maxIters=0), axis=0)[1]
        conf = mol.GetConformers()[idx]
    elif conformer == "first":
        conf = mol.GetConformer(0)
    elif conformer == "last":
        conf = mol.GetConformer(mol.GetNumConformers() - 1)
    elif conformer == "avg":
        allpos = [conf.GetPositions() for conf in mol.GetConformers()]
        avgpos = np.average(allpos, axis=0)
        # Set avg position as new position for all atoms
        conf = mol.GetConformer(0)
        for atom_idx in range(conf.GetNumAtoms()):
            atom_coords = avgpos[atom_idx]
            conf.SetAtomPosition(atom_idx, Point3D(atom_coords[0], \
                atom_coords[1], atom_coords[2]))
    else:
        available_confs = ["min", "max", "first", "last", "avg"]
        raise ValueError(
            f"Cannot get `{conformer}` conformer. Choose from the "
            f"following {available_confs} conformer(s).")

    # Save conformer, with the position specified
    mol.RemoveAllConformers()
    mol.AddConformer(conf)
    return mol