Esempio n. 1
0
    def get_multiring_atoms_bonds(self, rdk_mol: Mol, smiles):
        '''
        Not used
        '''
        atom_ring_times = [0] * rdk_mol.GetNumAtoms()
        bond_ring_times = [0] * rdk_mol.GetNumBonds()

        # TODO GetRingInfo gives SymmetricSSSR, not TRUE SSSR
        ri = rdk_mol.GetRingInfo()
        for id_atoms in ri.AtomRings():
            for ida in id_atoms:
                atom_ring_times[ida] += 1
        for id_bonds in ri.BondRings():
            for idb in id_bonds:
                bond_ring_times[idb] += 1

        n_atoms_multiring = len(list(filter(lambda x: x > 1, atom_ring_times)))
        n_bonds_multiring = len(list(filter(lambda x: x > 1, bond_ring_times)))

        py_mol = pybel.readstring('smi', smiles)
        if ri.NumRings() != len(py_mol.sssr):
            print(
                'WARNING: SymmetricSSSR not equal to TRUE SSSR in rdkit. Use Openbabel instead:',
                smiles)
            n_atoms_multiring = pybel.Smarts('[R2]').findall(py_mol).__len__()
            n_bonds_multiring = n_atoms_multiring - 1

        return n_atoms_multiring, n_bonds_multiring
Esempio n. 2
0
def mark_reactants(source_mol: Mol, target_mol: Mol):
    target_atoms = set(a.GetAtomMapNum()
                       for a in reversed(target_mol.GetAtoms()))
    for a in source_mol.GetAtoms():
        m = a.GetAtomMapNum()
        if m in target_atoms:
            a.SetBoolProp('in_target', True)
def build_position_matrix(molecule: Mol) -> np.ndarray:
    conf = molecule.GetConformer()
    return np.array([[
        conf.GetAtomPosition(k).x,
        conf.GetAtomPosition(k).y,
        conf.GetAtomPosition(k).z,
    ] for k in range(molecule.GetNumAtoms())])
Esempio n. 4
0
def update_feat_values(mol: Mol, atom_props: dict, bond_props: dict):
    for atom in mol.GetAtoms():
        for prop_key in atom_props.keys():
            atom_props[prop_key].add(try_get_atom_feature(atom, prop_key))

    for bond in mol.GetBonds():
        for prop_key in bond_props.keys():
            bond_props[prop_key].add(try_get_bond_feature(bond, prop_key))
def build_adjacency_matrix(molecule: Mol) -> np.ndarray:
    adj_matrix = np.eye(molecule.GetNumAtoms())

    for bond in molecule.GetBonds():
        begin_atom = bond.GetBeginAtom().GetIdx()
        end_atom = bond.GetEndAtom().GetIdx()
        adj_matrix[begin_atom, end_atom] = adj_matrix[end_atom, begin_atom] = 1

    return adj_matrix
Esempio n. 6
0
def add_map_numbers(mol: Mol) -> Mol:
    # converting to smiles to mol and again to smiles makes atom order canonical
    mol = Chem.MolFromSmiles(Chem.MolToSmiles(mol))

    map_nums = np.arange(mol.GetNumAtoms()) + 1
    np.random.shuffle(map_nums)

    for i, a in enumerate(mol.GetAtoms()):
        a.SetAtomMapNum(int(map_nums[i]))
    return mol
Esempio n. 7
0
def add_benzene_ring(mol: Mol, start_atom_ind: int, ring_atom_maps: List[int]):
    new_atom_ind = []

    map2i = dict((a.GetAtomMapNum(), i) for i, a in enumerate(mol.GetAtoms()))

    start_atom = mol.GetAtomWithIdx(start_atom_ind)
    start_atom.SetBoolProp('is_edited', True)
    start_atom.SetIsAromatic(True)
    start_atom_map = start_atom.GetAtomMapNum()

    if start_atom.HasProp('in_reactant'):
        in_reactant = start_atom.GetBoolProp('in_reactant')
    else:
        in_reactant = False

    if start_atom.HasProp('mol_id'):
        mol_id = start_atom.GetIntProp('mol_id')
    else:
        mol_id = 1

    for atom_map in ring_atom_maps:
        if atom_map != start_atom_map:
            if atom_map in map2i:
                new_atom_ind.append(map2i[atom_map])
            else:
                num_atoms = mol.GetNumAtoms()
                new_a = Chem.Atom(6)  # benzene has only carbon atoms
                new_a.SetAtomMapNum(atom_map)
                new_a.SetIsAromatic(True)
                new_a.SetBoolProp('is_edited', True)
                new_a.SetBoolProp('in_reactant', in_reactant)
                new_a.SetIntProp('mol_id', mol_id)
                mol.AddAtom(new_a)
                new_atom_ind.append(num_atoms)
        else:
            new_atom_ind.append(start_atom_ind)

    for i in range(len(new_atom_ind) - 1):
        bond = mol.GetBondBetweenAtoms(new_atom_ind[i], new_atom_ind[i + 1])
        if bond is None:
            bond_idx = mol.AddBond(new_atom_ind[i],
                                   new_atom_ind[i + 1],
                                   order=Chem.rdchem.BondType.AROMATIC) - 1
            bond = mol.GetBondWithIdx(bond_idx)
        bond.SetBoolProp('is_edited', True)

    bond = mol.GetBondBetweenAtoms(new_atom_ind[0], new_atom_ind[-1])
    if bond is None:
        bond_idx = mol.AddBond(new_atom_ind[0],
                               new_atom_ind[-1],
                               order=Chem.rdchem.BondType.AROMATIC) - 1
        bond = mol.GetBondWithIdx(bond_idx)
    bond.SetBoolProp('is_edited', True)

    return mol
Esempio n. 8
0
def find_rings(mol: Mol) -> List[List[int]]:
    ring_info = mol.GetRingInfo()
    rings = ring_info.AtomRings()

    i2map = dict((i, a.GetAtomMapNum()) for i, a in enumerate(mol.GetAtoms()))
    rings_mapped = []

    for ring in rings:
        rings_mapped.append([i2map[i] for i in ring])

    return rings_mapped
Esempio n. 9
0
def fix_incomplete_mappings(sub_mol: Mol, prod_mol: Mol) -> Tuple[Mol, Mol]:
    max_map = max(a.GetAtomMapNum() for a in sub_mol.GetAtoms())
    max_map = max(max(a.GetAtomMapNum() for a in prod_mol.GetAtoms()), max_map)

    for mol in (sub_mol, prod_mol):
        for a in mol.GetAtoms():
            map_num = a.GetAtomMapNum()
            if map_num is None or map_num < 1:
                max_map += 1
                a.SetAtomMapNum(max_map)
    return sub_mol, prod_mol
Esempio n. 10
0
def rdmol_to_data(mol: Mol):
    assert mol.GetNumConformers() == 1
    N = mol.GetNumAtoms()

    pos = torch.tensor(mol.GetConformer(0).GetPositions(), dtype=torch.float)

    atomic_number = []
    aromatic = []
    sp = []
    sp2 = []
    sp3 = []
    num_hs = []
    for atom in mol.GetAtoms():
        atomic_number.append(atom.GetAtomicNum())
        aromatic.append(1 if atom.GetIsAromatic() else 0)
        hybridization = atom.GetHybridization()
        sp.append(1 if hybridization == HybridizationType.SP else 0)
        sp2.append(1 if hybridization == HybridizationType.SP2 else 0)
        sp3.append(1 if hybridization == HybridizationType.SP3 else 0)

    z = torch.tensor(atomic_number, dtype=torch.long)

    row, col, edge_type = [], [], []
    for bond in mol.GetBonds():
        start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
        row += [start, end]
        col += [end, start]
        edge_type += 2 * [BOND_TYPES[bond.GetBondType()]]

    edge_index = torch.tensor([row, col], dtype=torch.long)
    edge_type = torch.tensor(edge_type)

    perm = (edge_index[0] * N + edge_index[1]).argsort()
    edge_index = edge_index[:, perm]
    edge_type = edge_type[perm]

    row, col = edge_index
    hs = (z == 1).to(torch.float)
    num_hs = scatter(hs[row], col, dim_size=N).tolist()

    smiles = Chem.MolToSmiles(mol)

    data = Data(node_type=z,
                pos=pos,
                edge_index=edge_index,
                edge_type=edge_type,
                rdmol=copy.deepcopy(mol),
                smiles=smiles)
    data.nx = to_networkx(data, to_undirected=True)

    return data
Esempio n. 11
0
def generate_conformers(lig_file, init='generate_conformers_init'):                                                     # option pdb_mol
	"""Performs the following tasks:
		> Creates the receptor folders within binding_ligands and decoy_ligands
		> Converts all PDB crystal ligands into mol for future use
		> Generates conformers and saves them to crystal_ligands folder"""

	init = eval(init)

	lig_name = lig_file[len(init.lig_path)+1: ]

	# use rdkit to get a mol object from the PDB
	pdb_file = os.path.join(init.out_lig_path, lig_name)
	mol_file = os.path.join(init.mol_path, lig_name).replace('.pdb', '.sdf')

	# write the mol to a mol file for future use
	mol = Chem.MolFromPDBFile(lig_file)
	writer = SDWriter(mol_file)
	writer.write(mol)

	# generate conformers and get the number of atoms of the molecule
	mol2 = Chem.AddHs(mol)																								# addHs
	pdb_writer = PDBWriter(pdb_file)
	conf_ids = AllChem.EmbedMultipleConfs(mol2, init.num_conformers)													# PDB has hydrogens
	for cid in conf_ids:
		AllChem.MMFFOptimizeMolecule(mol2, confId=cid)
		mol = Chem.RemoveHs(mol2)
		pdb_writer.write(mol)
	num_atoms = Mol.GetNumAtoms(mol)
	pdb_writer.close()																									# also has hydrogens

	print 'Generated conformers for one ligand'
	return [[pdb_file, mol_file, num_atoms]]
Esempio n. 12
0
def find_added_benzene_rings(source_mol: Mol,
                             target_mol: Mol) -> List[List[int]]:
    """
    Find benzene rings that were added in the process of reaction generation
    """
    target_rings = find_rings(target_mol)

    map2atom = dict(
        (a.GetAtomMapNum(), a) for i, a in enumerate(target_mol.GetAtoms()))
    source_atoms = set(a.GetAtomMapNum() for a in source_mol.GetAtoms())

    added_benzene_rings = []
    for ring in target_rings:
        if all(m not in source_atoms
               for m in ring) and is_benzene_ring([map2atom[m] for m in ring]):
            added_benzene_rings.append(ring)

    return added_benzene_rings
Esempio n. 13
0
def fix_explicit_hs(mol: Mol) -> Mol:
    for a in mol.GetAtoms():
        a.SetNoImplicit(False)

    mol = Chem.AddHs(mol, explicitOnly=True)
    mol = Chem.RemoveHs(mol)

    Chem.SanitizeMol(mol)
    return mol
Esempio n. 14
0
def filter_reactants(sub_mols: List[Mol], prod_mol: Mol) -> Mol:
    mol_maps = set(a.GetAtomMapNum() for a in prod_mol.GetAtoms())
    reactants = []
    for mol in sub_mols:
        for a in mol.GetAtoms():
            if a.GetAtomMapNum() in mol_maps:
                reactants.append(mol)
                break
    return Chem.MolFromSmiles('.'.join(
        [Chem.MolToSmiles(m) for m in reactants]))
Esempio n. 15
0
    def get_labels(
            self,
            mol: rdchem.Mol,
            label_names: Optional[Union[str, List[str]]] = None) -> List[str]:
        """Extract corresponding label info from the molecule.
        
        Params:
        -------
        mol: rdkit.Chem.rdchem.Mol
            Molecule of interest.

        label_names: str or list of str or None, optional, default=None
            Name of label(s).

        Returns:
        --------
        labels: list of str
            Label info, its length is equal to that of `label_name`.
        """
        if label_names is None:
            return []

        # Convert str to list for proper parsing
        if isinstance(label_names, str):
            label_names = [label_names]

        # # Extract labels and convert to float if num
        # labels = []
        # for name in label_names:
        #     if mol.HasProp(name):
        #         val = mol.GetProp(name)
        #         if val.replace('.', '', 1).isdigit():
        #             labels.append(float(val))
        #         else:
        #             labels.append(val)
        #     else:
        #         labels.append(None)
        # return labels
        return [
            float(mol.GetProp(name)) if mol.HasProp(name) else None
            for name in label_names
        ]
Esempio n. 16
0
def mol_to_extended_graph(molecule: Mol, seed: int = 0) -> Graph:
    rng = np.random.default_rng(seed=seed)
    start = rng.integers(low=0, high=molecule.GetNumAtoms(), size=1).item()
    bond_graph = build_bond_graph(molecule)
    sequence = get_random_bf_sequence(graph=bond_graph, start=start, rng=rng)

    graph = Graph()

    for new_node in sequence:
        embed_node_in_graph(graph, new_node=new_node, bond_graph=bond_graph, rng=rng)

    return graph
Esempio n. 17
0
def remove_bridge(molecule: Mol, root_pattern_smiles: str,
                  removal_indices: List[int]) -> Optional[Mol]:
    root_pattern = Chem.MolFromSmiles(root_pattern_smiles)
    matches = molecule.GetSubstructMatches(root_pattern)
    if len(matches) == 0:
        return None
    match = matches[0]

    e_mol = Chem.EditableMol(molecule)
    indexes_to_delete = list(map(lambda i: match[i], removal_indices))
    indexes_to_delete.sort(reverse=True)
    for i in indexes_to_delete:
        e_mol.RemoveAtom(i)
    molecule = e_mol.GetMol()
    return get_largest_fragment(molecule)
Esempio n. 18
0
def get_bridge_idty(ligand: Mol, class_pattern: str) -> Optional[List[str]]:
    ligand = Chem.DeleteSubstructs(
        ligand, Chem.MolFromSmiles("[N+](=O)[O-]", sanitize=False))
    root_pattern = Chem.MolFromSmiles(class_pattern, sanitize=False)
    chains = Chem.ReplaceCore(ligand, root_pattern)
    # display(chains)
    if chains is None:
        return None
    pieces = Chem.GetMolFrags(chains, asMols=True)
    ligands = sorted([Chem.MolToSmiles(x, True) for x in pieces], key=len)
    bridge = []
    for ligand in ligands:
        if (Chem.MolFromSmiles(ligand)
            ).GetNumAtoms() < 20 and ligand.count("*") > 1:
            bridge.append(re.sub(r"\[\d\*\]", "*", ligand))
    return bridge
Esempio n. 19
0
def check_num_atoms(mol: rdchem.Mol,
                    max_num_atoms: Optional[int] = -1) -> None:
    """Check number of atoms in `mol` does not exceed `max_num_atoms`.

    If number of atoms in `mol` exceeds the number `max_num_atoms`, it 
    will raise `MolFeatureExtractionError` exception.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        The molecule to check.

    num_max_atoms: int, optional , default=-1 
        Maximum allowed number of atoms in a molecule. If negative, 
        check passes unconditionally.
    """
    num_atoms = mol.GetNumAtoms()
    if max_num_atoms >= 0 and num_atoms > max_num_atoms:
        raise MolFeatureExtractionError("Atoms in mol (N={}) exceeds " \
            "num_max_atoms (N={}).".format(num_atoms, max_num_atoms))
Esempio n. 20
0
    def __init__(self,
                 source_mol: RWMol,
                 target_mol: Mol,
                 action_vocab: dict,
                 forward: bool = False,
                 action_order: str = 'dfs'):
        self.source_mol = source_mol
        self.target_mol = target_mol

        self.randomize_action_types = 'random' in action_order
        self.randomize_map_atom_order = action_order == 'random' or 'randat' in action_order
        self.randomize_next_atom = action_order == 'random'

        self.action_order = action_order

        self.atoms_stack = []
        if 'bfs' in self.action_order:
            for a in target_mol.GetAtoms():
                self.atoms_stack.append(a.GetAtomMapNum())
            self.atoms_stack = list(sorted(self.atoms_stack))

        mark_reactants(source_mol, target_mol)

        self.edited_atoms = set()
        self.forward = forward
        self.action_vocab = action_vocab
        self.prop_dict = action_vocab['prop2oh']

        self.added_rings = {
            'benzene':
            find_added_benzene_rings(source_mol=source_mol,
                                     target_mol=target_mol)
        }
        self.current_step = 0
        self.current_mol_graph = get_graph(self.source_mol,
                                           ravel=False,
                                           to_array=True,
                                           atom_prop2oh=self.prop_dict['atom'],
                                           bond_prop2oh=self.prop_dict['bond'])
Esempio n. 21
0
def build_bond_graph(molecule: Mol) -> Graph:
    graph = Graph()
    for bond in molecule.GetBonds():
        source_index, sink_index = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
        graph.add_edge(source_index, sink_index)
    return graph
Esempio n. 22
0
def construct_mol_features(mol: rdchem.Mol,
                           out_size: Optional[int] = -1) -> np.ndarray:
    """Returns the atom features of all the atoms in the molecule.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        Molecule of interest.

    out_size: int, optional, default=-1
        The size of the returned array. If this option is negative, it 
        does not take any effect. Otherwise, it must be larger than or 
        equal to the number of atoms in the input molecule. If so, the 
        end of the array is padded with zeros.

    Returns:
    --------
    mol_feats: np.ndarray, shape=(n,m)
        Where `n` is the total number of atoms within the molecule, and 
        `m` is the number of feats.
    """
    # Caluclate charges and chirality of atoms within molecule
    rdPartialCharges.ComputeGasteigerCharges(
        mol)  # stored under _GasteigerCharge
    rdmolops.AssignStereochemistry(
        mol)  # stored under _CIPCode, see doc for more info

    # Retrieve atom index locations of matches
    HYDROGEN_DONOR = rdmolfiles.MolFromSmarts(
        "[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0])" + ",n&H1&+0]")
    HYROGEN_ACCEPTOR = rdmolfiles.MolFromSmarts(
        "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])])" +
        ",$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),n&H0&+0," +
        "$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]")
    ACIDIC = rdmolfiles.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
    BASIC = rdmolfiles.MolFromSmarts(
        "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))])" +
        ",$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))])," +
        "$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]")
    hydrogen_donor_match = sum(mol.GetSubstructMatches(HYDROGEN_DONOR), ())
    hydrogen_acceptor_match = sum(mol.GetSubstructMatches(HYROGEN_ACCEPTOR),
                                  ())
    acidic_match = sum(mol.GetSubstructMatches(ACIDIC), ())
    basic_match = sum(mol.GetSubstructMatches(BASIC), ())

    # Get ring info
    ring = mol.GetRingInfo()

    mol_feats = []
    n_atoms = mol.GetNumAtoms()
    for atom_idx in range(n_atoms):
        atom = mol.GetAtomWithIdx(atom_idx)

        atom_feats = []
        atom_feats += one_hot(atom.GetSymbol(), [
            'C', 'O', 'N', 'S', 'Cl', 'F', 'Br', 'P', 'I', 'Si', 'B', 'Na',
            'Sn', 'Se', 'other'
        ])
        atom_feats += one_hot(atom.GetDegree(), [1, 2, 3, 4, 5, 6])
        atom_feats += one_hot(atom.GetHybridization(),
                              list(rdchem.HybridizationType.names.values()))
        atom_feats += one_hot(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6])
        atom_feats += one_hot(atom.GetFormalCharge(), [-3, -2, -1, 0, 1, 2, 3])
        g_charge = float(atom.GetProp("_GasteigerCharge"))
        atom_feats += [g_charge] if not np.isnan(g_charge) else [0.]
        atom_feats += [atom.GetIsAromatic()]

        atom_feats += [
            ring.IsAtomInRingOfSize(atom_idx, size) for size in range(3, 9)
        ]
        atom_feats += one_hot(atom.GetTotalNumHs(), [0, 1, 2, 3, 4])

        # Chirality
        try:
            atom_feats += one_hot(atom.GetProp('_CIPCode'), ["R", "S"]) + [
                atom.HasProp("_ChiralityPossible")
            ]
        except:
            atom_feats += [False, False] + [atom.HasProp("_ChiralityPossible")]
        # Hydrogen bonding
        atom_feats += [atom_idx in hydrogen_donor_match]
        atom_feats += [atom_idx in hydrogen_acceptor_match]
        # Is Acidic/Basic
        atom_feats += [atom_idx in acidic_match]
        atom_feats += [atom_idx in basic_match]

        mol_feats.append(atom_feats)

    if out_size < 0:
        return np.array(mol_feats, dtype=np.float)
    elif out_size >= n_atoms:
        # 'empty' padding for `mol_feats`. Generate(s) feature matrix of same size for all mols
        # NOTE: len(mol_feats[0]) is the number of feats
        padded_mol_feats = np.zeros((out_size, len(mol_feats[0])),
                                    dtype=np.float)
        padded_mol_feats[:n_atoms] = np.array(mol_feats, dtype=np.float)
        return padded_mol_feats
    else:
        raise ValueError(
            '`out_size` (N={}) must be negative or larger than or '
            'equal to the number of atoms in the input molecules (N={}).'.
            format(out_size, n_atoms))
Esempio n. 23
0
def construct_pos_matrix(mol: rdchem.Mol,
                         out_size: Optional[int] = -1) -> np.ndarray:
    """Construct relative positions from each atom within the molecule.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        Molecule of interest. 

    out_size: int, optional, default=-1
        The size of the returned array. If this option is negative, it 
        does not take any effect. Otherwise, it must be larger than or 
        equal to the number of atoms in the input molecule. If so, the 
        end of the array is padded with zeros.

    Returns:
    --------
    pos_matrix: np.ndarray, shape=(n,n,3)
        Relative position (XYZ) coordinates from one atom the others in 
        the mol. 

    Examples:
    ---------
    ```python
    >>> from rdkit import Chem
    >>> from rdkit.Chem import AllChem
    >>> smiles = 'N[C@@]([H])([C@]([H])(O2)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])([C@]([H])' \
                 '(O)C)C(=O)N[C@@]([H])(Cc1ccc(O)cc1)C(=O)2'
    >>> mol = Chem.MolFromSmiles(smiles)
    >>> mol = Chem.AddHs(mol, addCoords=True)
    >>> AllChem.EmbedMolecule(mol, AllChem.ETKDG())
    >>> mol = Chem.RemoveHs(mol)
    >>> pos_matrix = construct_pos_matrix(mol, out_size=-1)
    >>> pos_matrix.shape
    (34,34,3)

    >>> pos_matrix = construct_pos_matrix(mol, out_size=49)
    >>> pos_matrix.shape
    (49,49,3)
    ```
    """
    # Obtain initial distance geometry between atoms, if unavilable
    if mol.GetNumConformers() == 0:
        mol = rdmolops.AddHs(mol, addCoords=True)
        rdDistGeom.EmbedMolecule(mol, rdDistGeom.ETKDG())
        mol = rdmolops.RemoveHs(mol)
    coords = mol.GetConformer().GetPositions()  # shape=(N,3)
    N = mol.GetNumAtoms()

    # Determine appropiate output size to generate feature matrix of same size for all mols.
    if out_size < 0:
        size = N
    elif out_size >= N:
        size = out_size
    else:
        raise ValueError(
            '`out_size` (N={}) is smaller than number of atoms in mol (N={})'.
            format(out_size, N))

    pos_matrix = np.zeros(shape=(size, size, 3), dtype=np.float)
    for atom_idx in range(N):
        atom_pos = coords[atom_idx]  # central atom of interest
        for neighbor_idx in range(N):
            neigh_pos = coords[neighbor_idx]  # neighboring atom
            pos_matrix[
                atom_idx,
                neighbor_idx] = atom_pos - neigh_pos  # dist between neighbor -> center
    return pos_matrix
Esempio n. 24
0
repo = COCONUT()
# print(repo.count())
i = 0
converted, not_converted = 0, 0

converted_list_file = open("out/inchi_valid_check/converted.txt",
                           mode="w",
                           encoding="utf-8")
converted_not_match_file = open(
    "out/inchi_valid_check/converted_not_match.txt",
    mode="w",
    encoding="utf-8")
w = SDWriter("out/inchi_valid_check/converted.sdf")
np: Unique_NP
for np in repo.get_unique_stream():
    mol = Mol()
    try:
        mol = Chem.MolFromInchi(inchi=np.inchi, treatWarningAsError=True)
        mol.SetProp("coconut_id", np.coconut_id)
    except:
        not_converted += 1

    if mol:
        mol_inchikey = inchi.MolToInchiKey(mol)

        if np.inchikey == mol_inchikey:
            converted_list_file.write(np.inchi + "\n")
            w.write(mol)
            converted += 1
        else:
            converted_not_match_file.write(np.inchi + "\n")
Esempio n. 25
0
def display_numbered(mol: Mol):
    mol = deepcopy(mol)
    for atom in mol.GetAtoms():
        atom.SetAtomMapNum(atom.GetIdx())
    display(mol)
Esempio n. 26
0
def renumber_atoms_for_mapping(mol: Mol) -> Mol:
    new_order = []
    for a in mol.GetAtoms():
        new_order.append(a.GetAtomMapNum())
    new_order = [int(a) for a in np.argsort(new_order)]
    return RenumberAtoms(mol, new_order)
Esempio n. 27
0
def get_atom_ind(mol: Mol, atom_map: int) -> int:
    for i, a in enumerate(mol.GetAtoms()):
        if a.GetAtomMapNum() == atom_map:
            return i
    raise ValueError(f'No atom with map number: {atom_map}')
				merge_cols=['cryst_lig_file'],
				order=pair_idx)

# Extract filepaths to PDB and Mol files
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
_, _, _, _, out_table_generate = cursor.fetchall()
out_table_generate = out_table_generate[0]
conn.close()

table_data = db_master.retrieve(out_table_generate, ['bind_lig_files', 'mol_files'], {'run_idx': '{}<100000'})
all_pdb_files = [table_data[0][i].encode('ascii','ignore') for i in range(len(table_data[0]))]
all_mol_files = [table_data[1][i].encode('ascii','ignore') for i in range(len(table_data[0]))]
all_mols = [Chem.MolFromMolFile(all_mol_files[i]) for i in range(len(all_mol_files))]
all_num_atoms = [Mol.GetNumAtoms(all_mols[i]) for i in range(len(all_mols))]

GetDecoysInit(all_pdb_files, all_mol_files, all_mols, all_num_atoms,
	max_atom_dif=2, max_substruct=4, max_num_decoys=10)

afdb.run_multithread(func='get_decoys',
	arg_types=[str, str, int],
	arg_lists=[all_pdb_files, all_mol_files, all_num_atoms],
	out_types=[str, str],
	out_names=['bind_lig_files', 'decoy_ligs'],
	num_threads=100, commit_sec=1)

print '\nGot decoys for each ligand in:', str(time.time()-start), 'seconds\n'

"""Generate conformers for all the decoy ligands---------------------------------"""
def build_atom_features_matrix(mol: Mol) -> np.ndarray:
    return np.array([get_atom_features(atom) for atom in mol.GetAtoms()])
    def get_smallest_root_match(self, mol: Mol) -> Mol:
        search_space: Set[Mol] = set(self.molecules.values())

        all_idxs = set(range(0, mol.GetNumAtoms()))
        included_idxs = {0}

        bonds = {}

        def register_bond(from_idx: int, to_idx: int):
            entry = bonds.get(from_idx)
            if entry is None:
                entry = []
                bonds[from_idx] = entry
            entry.append(to_idx)

        for bond in mol.GetBonds():
            begin = bond.GetBeginAtomIdx()
            end = bond.GetEndAtomIdx()
            register_bond(begin, end)
            register_bond(end, begin)

        while len(included_idxs) < mol.GetNumAtoms():
            frontier_permutations = reduce(
                lambda perms, from_idx: perms | set(
                    map(
                        lambda to_idx: frozenset([*included_idxs, to_idx]),
                        filter(lambda idx: idx not in included_idxs, bonds[from_idx])
                    )
                ),
                included_idxs,
                set()
            )

            new_search_space = set()
            for perm in frontier_permutations:
                e_mol = Chem.EditableMol(mol)
                perm_idxs = list(all_idxs - perm)
                perm_idxs.sort(reverse=True)
                for idx in perm_idxs:
                    e_mol.RemoveAtom(idx)
                display('mul')
                display_numbered(e_mol.GetMol())
                new_search_space |= set(self.find_superstructures(e_mol.GetMol(), search_space))
                included_idxs |= perm

            if len(new_search_space) == 0:
                return self.get_smallest_mol(list(search_space))
            search_space = new_search_space
            if len(search_space) < 100:
                display("from mul")
                for s in search_space:
                    display_numbered(s)

            if len(frontier_permutations) > 1:
                e_mol = Chem.EditableMol(mol)
                perm_idxs = list(all_idxs - included_idxs)
                perm_idxs.sort(reverse=True)
                for idx in perm_idxs:
                    e_mol.RemoveAtom(idx)
                display('single')
                display_numbered(e_mol.GetMol())
                new_search_space = set(self.find_superstructures(e_mol.GetMol(), search_space))

            if len(new_search_space) == 0:
                return self.get_smallest_mol(list(search_space))
            search_space = new_search_space
            if len(search_space) < 100:
                display("from mul")
                for s in search_space:
                    display_numbered(s)