Beispiel #1
0
    def get_multiring_atoms_bonds(self, rdk_mol: Mol, smiles):
        '''
        Not used
        '''
        atom_ring_times = [0] * rdk_mol.GetNumAtoms()
        bond_ring_times = [0] * rdk_mol.GetNumBonds()

        # TODO GetRingInfo gives SymmetricSSSR, not TRUE SSSR
        ri = rdk_mol.GetRingInfo()
        for id_atoms in ri.AtomRings():
            for ida in id_atoms:
                atom_ring_times[ida] += 1
        for id_bonds in ri.BondRings():
            for idb in id_bonds:
                bond_ring_times[idb] += 1

        n_atoms_multiring = len(list(filter(lambda x: x > 1, atom_ring_times)))
        n_bonds_multiring = len(list(filter(lambda x: x > 1, bond_ring_times)))

        py_mol = pybel.readstring('smi', smiles)
        if ri.NumRings() != len(py_mol.sssr):
            print(
                'WARNING: SymmetricSSSR not equal to TRUE SSSR in rdkit. Use Openbabel instead:',
                smiles)
            n_atoms_multiring = pybel.Smarts('[R2]').findall(py_mol).__len__()
            n_bonds_multiring = n_atoms_multiring - 1

        return n_atoms_multiring, n_bonds_multiring
def build_position_matrix(molecule: Mol) -> np.ndarray:
    conf = molecule.GetConformer()
    return np.array([[
        conf.GetAtomPosition(k).x,
        conf.GetAtomPosition(k).y,
        conf.GetAtomPosition(k).z,
    ] for k in range(molecule.GetNumAtoms())])
Beispiel #3
0
def generate_conformers(lig_file, init='generate_conformers_init'):                                                     # option pdb_mol
	"""Performs the following tasks:
		> Creates the receptor folders within binding_ligands and decoy_ligands
		> Converts all PDB crystal ligands into mol for future use
		> Generates conformers and saves them to crystal_ligands folder"""

	init = eval(init)

	lig_name = lig_file[len(init.lig_path)+1: ]

	# use rdkit to get a mol object from the PDB
	pdb_file = os.path.join(init.out_lig_path, lig_name)
	mol_file = os.path.join(init.mol_path, lig_name).replace('.pdb', '.sdf')

	# write the mol to a mol file for future use
	mol = Chem.MolFromPDBFile(lig_file)
	writer = SDWriter(mol_file)
	writer.write(mol)

	# generate conformers and get the number of atoms of the molecule
	mol2 = Chem.AddHs(mol)																								# addHs
	pdb_writer = PDBWriter(pdb_file)
	conf_ids = AllChem.EmbedMultipleConfs(mol2, init.num_conformers)													# PDB has hydrogens
	for cid in conf_ids:
		AllChem.MMFFOptimizeMolecule(mol2, confId=cid)
		mol = Chem.RemoveHs(mol2)
		pdb_writer.write(mol)
	num_atoms = Mol.GetNumAtoms(mol)
	pdb_writer.close()																									# also has hydrogens

	print 'Generated conformers for one ligand'
	return [[pdb_file, mol_file, num_atoms]]
def build_adjacency_matrix(molecule: Mol) -> np.ndarray:
    adj_matrix = np.eye(molecule.GetNumAtoms())

    for bond in molecule.GetBonds():
        begin_atom = bond.GetBeginAtom().GetIdx()
        end_atom = bond.GetEndAtom().GetIdx()
        adj_matrix[begin_atom, end_atom] = adj_matrix[end_atom, begin_atom] = 1

    return adj_matrix
Beispiel #5
0
def add_map_numbers(mol: Mol) -> Mol:
    # converting to smiles to mol and again to smiles makes atom order canonical
    mol = Chem.MolFromSmiles(Chem.MolToSmiles(mol))

    map_nums = np.arange(mol.GetNumAtoms()) + 1
    np.random.shuffle(map_nums)

    for i, a in enumerate(mol.GetAtoms()):
        a.SetAtomMapNum(int(map_nums[i]))
    return mol
Beispiel #6
0
def add_benzene_ring(mol: Mol, start_atom_ind: int, ring_atom_maps: List[int]):
    new_atom_ind = []

    map2i = dict((a.GetAtomMapNum(), i) for i, a in enumerate(mol.GetAtoms()))

    start_atom = mol.GetAtomWithIdx(start_atom_ind)
    start_atom.SetBoolProp('is_edited', True)
    start_atom.SetIsAromatic(True)
    start_atom_map = start_atom.GetAtomMapNum()

    if start_atom.HasProp('in_reactant'):
        in_reactant = start_atom.GetBoolProp('in_reactant')
    else:
        in_reactant = False

    if start_atom.HasProp('mol_id'):
        mol_id = start_atom.GetIntProp('mol_id')
    else:
        mol_id = 1

    for atom_map in ring_atom_maps:
        if atom_map != start_atom_map:
            if atom_map in map2i:
                new_atom_ind.append(map2i[atom_map])
            else:
                num_atoms = mol.GetNumAtoms()
                new_a = Chem.Atom(6)  # benzene has only carbon atoms
                new_a.SetAtomMapNum(atom_map)
                new_a.SetIsAromatic(True)
                new_a.SetBoolProp('is_edited', True)
                new_a.SetBoolProp('in_reactant', in_reactant)
                new_a.SetIntProp('mol_id', mol_id)
                mol.AddAtom(new_a)
                new_atom_ind.append(num_atoms)
        else:
            new_atom_ind.append(start_atom_ind)

    for i in range(len(new_atom_ind) - 1):
        bond = mol.GetBondBetweenAtoms(new_atom_ind[i], new_atom_ind[i + 1])
        if bond is None:
            bond_idx = mol.AddBond(new_atom_ind[i],
                                   new_atom_ind[i + 1],
                                   order=Chem.rdchem.BondType.AROMATIC) - 1
            bond = mol.GetBondWithIdx(bond_idx)
        bond.SetBoolProp('is_edited', True)

    bond = mol.GetBondBetweenAtoms(new_atom_ind[0], new_atom_ind[-1])
    if bond is None:
        bond_idx = mol.AddBond(new_atom_ind[0],
                               new_atom_ind[-1],
                               order=Chem.rdchem.BondType.AROMATIC) - 1
        bond = mol.GetBondWithIdx(bond_idx)
    bond.SetBoolProp('is_edited', True)

    return mol
Beispiel #7
0
def mol_to_extended_graph(molecule: Mol, seed: int = 0) -> Graph:
    rng = np.random.default_rng(seed=seed)
    start = rng.integers(low=0, high=molecule.GetNumAtoms(), size=1).item()
    bond_graph = build_bond_graph(molecule)
    sequence = get_random_bf_sequence(graph=bond_graph, start=start, rng=rng)

    graph = Graph()

    for new_node in sequence:
        embed_node_in_graph(graph, new_node=new_node, bond_graph=bond_graph, rng=rng)

    return graph
Beispiel #8
0
def rdmol_to_data(mol: Mol):
    assert mol.GetNumConformers() == 1
    N = mol.GetNumAtoms()

    pos = torch.tensor(mol.GetConformer(0).GetPositions(), dtype=torch.float)

    atomic_number = []
    aromatic = []
    sp = []
    sp2 = []
    sp3 = []
    num_hs = []
    for atom in mol.GetAtoms():
        atomic_number.append(atom.GetAtomicNum())
        aromatic.append(1 if atom.GetIsAromatic() else 0)
        hybridization = atom.GetHybridization()
        sp.append(1 if hybridization == HybridizationType.SP else 0)
        sp2.append(1 if hybridization == HybridizationType.SP2 else 0)
        sp3.append(1 if hybridization == HybridizationType.SP3 else 0)

    z = torch.tensor(atomic_number, dtype=torch.long)

    row, col, edge_type = [], [], []
    for bond in mol.GetBonds():
        start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
        row += [start, end]
        col += [end, start]
        edge_type += 2 * [BOND_TYPES[bond.GetBondType()]]

    edge_index = torch.tensor([row, col], dtype=torch.long)
    edge_type = torch.tensor(edge_type)

    perm = (edge_index[0] * N + edge_index[1]).argsort()
    edge_index = edge_index[:, perm]
    edge_type = edge_type[perm]

    row, col = edge_index
    hs = (z == 1).to(torch.float)
    num_hs = scatter(hs[row], col, dim_size=N).tolist()

    smiles = Chem.MolToSmiles(mol)

    data = Data(node_type=z,
                pos=pos,
                edge_index=edge_index,
                edge_type=edge_type,
                rdmol=copy.deepcopy(mol),
                smiles=smiles)
    data.nx = to_networkx(data, to_undirected=True)

    return data
Beispiel #9
0
def check_num_atoms(mol: rdchem.Mol,
                    max_num_atoms: Optional[int] = -1) -> None:
    """Check number of atoms in `mol` does not exceed `max_num_atoms`.

    If number of atoms in `mol` exceeds the number `max_num_atoms`, it 
    will raise `MolFeatureExtractionError` exception.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        The molecule to check.

    num_max_atoms: int, optional , default=-1 
        Maximum allowed number of atoms in a molecule. If negative, 
        check passes unconditionally.
    """
    num_atoms = mol.GetNumAtoms()
    if max_num_atoms >= 0 and num_atoms > max_num_atoms:
        raise MolFeatureExtractionError("Atoms in mol (N={}) exceeds " \
            "num_max_atoms (N={}).".format(num_atoms, max_num_atoms))
Beispiel #10
0
def construct_mol_features(mol: rdchem.Mol,
                           out_size: Optional[int] = -1) -> np.ndarray:
    """Returns the atom features of all the atoms in the molecule.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        Molecule of interest.

    out_size: int, optional, default=-1
        The size of the returned array. If this option is negative, it 
        does not take any effect. Otherwise, it must be larger than or 
        equal to the number of atoms in the input molecule. If so, the 
        end of the array is padded with zeros.

    Returns:
    --------
    mol_feats: np.ndarray, shape=(n,m)
        Where `n` is the total number of atoms within the molecule, and 
        `m` is the number of feats.
    """
    # Caluclate charges and chirality of atoms within molecule
    rdPartialCharges.ComputeGasteigerCharges(
        mol)  # stored under _GasteigerCharge
    rdmolops.AssignStereochemistry(
        mol)  # stored under _CIPCode, see doc for more info

    # Retrieve atom index locations of matches
    HYDROGEN_DONOR = rdmolfiles.MolFromSmarts(
        "[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0])" + ",n&H1&+0]")
    HYROGEN_ACCEPTOR = rdmolfiles.MolFromSmarts(
        "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])])" +
        ",$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),n&H0&+0," +
        "$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]")
    ACIDIC = rdmolfiles.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
    BASIC = rdmolfiles.MolFromSmarts(
        "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))])" +
        ",$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))])," +
        "$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]")
    hydrogen_donor_match = sum(mol.GetSubstructMatches(HYDROGEN_DONOR), ())
    hydrogen_acceptor_match = sum(mol.GetSubstructMatches(HYROGEN_ACCEPTOR),
                                  ())
    acidic_match = sum(mol.GetSubstructMatches(ACIDIC), ())
    basic_match = sum(mol.GetSubstructMatches(BASIC), ())

    # Get ring info
    ring = mol.GetRingInfo()

    mol_feats = []
    n_atoms = mol.GetNumAtoms()
    for atom_idx in range(n_atoms):
        atom = mol.GetAtomWithIdx(atom_idx)

        atom_feats = []
        atom_feats += one_hot(atom.GetSymbol(), [
            'C', 'O', 'N', 'S', 'Cl', 'F', 'Br', 'P', 'I', 'Si', 'B', 'Na',
            'Sn', 'Se', 'other'
        ])
        atom_feats += one_hot(atom.GetDegree(), [1, 2, 3, 4, 5, 6])
        atom_feats += one_hot(atom.GetHybridization(),
                              list(rdchem.HybridizationType.names.values()))
        atom_feats += one_hot(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6])
        atom_feats += one_hot(atom.GetFormalCharge(), [-3, -2, -1, 0, 1, 2, 3])
        g_charge = float(atom.GetProp("_GasteigerCharge"))
        atom_feats += [g_charge] if not np.isnan(g_charge) else [0.]
        atom_feats += [atom.GetIsAromatic()]

        atom_feats += [
            ring.IsAtomInRingOfSize(atom_idx, size) for size in range(3, 9)
        ]
        atom_feats += one_hot(atom.GetTotalNumHs(), [0, 1, 2, 3, 4])

        # Chirality
        try:
            atom_feats += one_hot(atom.GetProp('_CIPCode'), ["R", "S"]) + [
                atom.HasProp("_ChiralityPossible")
            ]
        except:
            atom_feats += [False, False] + [atom.HasProp("_ChiralityPossible")]
        # Hydrogen bonding
        atom_feats += [atom_idx in hydrogen_donor_match]
        atom_feats += [atom_idx in hydrogen_acceptor_match]
        # Is Acidic/Basic
        atom_feats += [atom_idx in acidic_match]
        atom_feats += [atom_idx in basic_match]

        mol_feats.append(atom_feats)

    if out_size < 0:
        return np.array(mol_feats, dtype=np.float)
    elif out_size >= n_atoms:
        # 'empty' padding for `mol_feats`. Generate(s) feature matrix of same size for all mols
        # NOTE: len(mol_feats[0]) is the number of feats
        padded_mol_feats = np.zeros((out_size, len(mol_feats[0])),
                                    dtype=np.float)
        padded_mol_feats[:n_atoms] = np.array(mol_feats, dtype=np.float)
        return padded_mol_feats
    else:
        raise ValueError(
            '`out_size` (N={}) must be negative or larger than or '
            'equal to the number of atoms in the input molecules (N={}).'.
            format(out_size, n_atoms))
Beispiel #11
0
def construct_pos_matrix(mol: rdchem.Mol,
                         out_size: Optional[int] = -1) -> np.ndarray:
    """Construct relative positions from each atom within the molecule.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        Molecule of interest. 

    out_size: int, optional, default=-1
        The size of the returned array. If this option is negative, it 
        does not take any effect. Otherwise, it must be larger than or 
        equal to the number of atoms in the input molecule. If so, the 
        end of the array is padded with zeros.

    Returns:
    --------
    pos_matrix: np.ndarray, shape=(n,n,3)
        Relative position (XYZ) coordinates from one atom the others in 
        the mol. 

    Examples:
    ---------
    ```python
    >>> from rdkit import Chem
    >>> from rdkit.Chem import AllChem
    >>> smiles = 'N[C@@]([H])([C@]([H])(O2)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])([C@]([H])' \
                 '(O)C)C(=O)N[C@@]([H])(Cc1ccc(O)cc1)C(=O)2'
    >>> mol = Chem.MolFromSmiles(smiles)
    >>> mol = Chem.AddHs(mol, addCoords=True)
    >>> AllChem.EmbedMolecule(mol, AllChem.ETKDG())
    >>> mol = Chem.RemoveHs(mol)
    >>> pos_matrix = construct_pos_matrix(mol, out_size=-1)
    >>> pos_matrix.shape
    (34,34,3)

    >>> pos_matrix = construct_pos_matrix(mol, out_size=49)
    >>> pos_matrix.shape
    (49,49,3)
    ```
    """
    # Obtain initial distance geometry between atoms, if unavilable
    if mol.GetNumConformers() == 0:
        mol = rdmolops.AddHs(mol, addCoords=True)
        rdDistGeom.EmbedMolecule(mol, rdDistGeom.ETKDG())
        mol = rdmolops.RemoveHs(mol)
    coords = mol.GetConformer().GetPositions()  # shape=(N,3)
    N = mol.GetNumAtoms()

    # Determine appropiate output size to generate feature matrix of same size for all mols.
    if out_size < 0:
        size = N
    elif out_size >= N:
        size = out_size
    else:
        raise ValueError(
            '`out_size` (N={}) is smaller than number of atoms in mol (N={})'.
            format(out_size, N))

    pos_matrix = np.zeros(shape=(size, size, 3), dtype=np.float)
    for atom_idx in range(N):
        atom_pos = coords[atom_idx]  # central atom of interest
        for neighbor_idx in range(N):
            neigh_pos = coords[neighbor_idx]  # neighboring atom
            pos_matrix[
                atom_idx,
                neighbor_idx] = atom_pos - neigh_pos  # dist between neighbor -> center
    return pos_matrix
				merge_cols=['cryst_lig_file'],
				order=pair_idx)

# Extract filepaths to PDB and Mol files
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
_, _, _, _, out_table_generate = cursor.fetchall()
out_table_generate = out_table_generate[0]
conn.close()

table_data = db_master.retrieve(out_table_generate, ['bind_lig_files', 'mol_files'], {'run_idx': '{}<100000'})
all_pdb_files = [table_data[0][i].encode('ascii','ignore') for i in range(len(table_data[0]))]
all_mol_files = [table_data[1][i].encode('ascii','ignore') for i in range(len(table_data[0]))]
all_mols = [Chem.MolFromMolFile(all_mol_files[i]) for i in range(len(all_mol_files))]
all_num_atoms = [Mol.GetNumAtoms(all_mols[i]) for i in range(len(all_mols))]

GetDecoysInit(all_pdb_files, all_mol_files, all_mols, all_num_atoms,
	max_atom_dif=2, max_substruct=4, max_num_decoys=10)

afdb.run_multithread(func='get_decoys',
	arg_types=[str, str, int],
	arg_lists=[all_pdb_files, all_mol_files, all_num_atoms],
	out_types=[str, str],
	out_names=['bind_lig_files', 'decoy_ligs'],
	num_threads=100, commit_sec=1)

print '\nGot decoys for each ligand in:', str(time.time()-start), 'seconds\n'

"""Generate conformers for all the decoy ligands---------------------------------"""
    def get_smallest_root_match(self, mol: Mol) -> Mol:
        search_space: Set[Mol] = set(self.molecules.values())

        all_idxs = set(range(0, mol.GetNumAtoms()))
        included_idxs = {0}

        bonds = {}

        def register_bond(from_idx: int, to_idx: int):
            entry = bonds.get(from_idx)
            if entry is None:
                entry = []
                bonds[from_idx] = entry
            entry.append(to_idx)

        for bond in mol.GetBonds():
            begin = bond.GetBeginAtomIdx()
            end = bond.GetEndAtomIdx()
            register_bond(begin, end)
            register_bond(end, begin)

        while len(included_idxs) < mol.GetNumAtoms():
            frontier_permutations = reduce(
                lambda perms, from_idx: perms | set(
                    map(
                        lambda to_idx: frozenset([*included_idxs, to_idx]),
                        filter(lambda idx: idx not in included_idxs, bonds[from_idx])
                    )
                ),
                included_idxs,
                set()
            )

            new_search_space = set()
            for perm in frontier_permutations:
                e_mol = Chem.EditableMol(mol)
                perm_idxs = list(all_idxs - perm)
                perm_idxs.sort(reverse=True)
                for idx in perm_idxs:
                    e_mol.RemoveAtom(idx)
                display('mul')
                display_numbered(e_mol.GetMol())
                new_search_space |= set(self.find_superstructures(e_mol.GetMol(), search_space))
                included_idxs |= perm

            if len(new_search_space) == 0:
                return self.get_smallest_mol(list(search_space))
            search_space = new_search_space
            if len(search_space) < 100:
                display("from mul")
                for s in search_space:
                    display_numbered(s)

            if len(frontier_permutations) > 1:
                e_mol = Chem.EditableMol(mol)
                perm_idxs = list(all_idxs - included_idxs)
                perm_idxs.sort(reverse=True)
                for idx in perm_idxs:
                    e_mol.RemoveAtom(idx)
                display('single')
                display_numbered(e_mol.GetMol())
                new_search_space = set(self.find_superstructures(e_mol.GetMol(), search_space))

            if len(new_search_space) == 0:
                return self.get_smallest_mol(list(search_space))
            search_space = new_search_space
            if len(search_space) < 100:
                display("from mul")
                for s in search_space:
                    display_numbered(s)