Example #1
0
def atom_mapper3D(reactant, products):
    '''
    Written by Mads Koerstz
    '''
    reactant = label_atoms(reactant)
    opts = StereoEnumerationOptions(onlyUnassigned=False, unique=False)
    rdmolops.AssignStereochemistry(reactant,
                                   cleanIt=True,
                                   flagPossibleStereoCenters=True,
                                   force=True)

    reactant = next(EnumerateStereoisomers(reactant, options=opts))

    # Prepare reactant
    reactant = reassign_atom_idx(
        reactant)  # Makes Graph atom idx = SMILES atom mapped idx.
    rdmolops.AssignStereochemistry(reactant,
                                   cleanIt=True,
                                   flagPossibleStereoCenters=True,
                                   force=True)  # Assigns _CIPCode.

    # Prepare Product
    new_products = []
    for product in products:
        product = label_atoms(product)
        product = reassign_atom_idx(
            product)  # Makes Graph atom idx = SMILES atom mapped idx .

        new_products.append(set_chirality(product, reactant))

    return reactant, new_products
Example #2
0
def set_chirality(product, reactant):
    """ Written by Mads Koerstz
    Produce all combinations of isomers (R/S and cis/trans). But force 
    product atoms with unchanged neighbors to the same label chirality as
    the reactant """

    # TODO move these somewhere it makes more sense.
    product = reassign_atom_idx(product)
    reactant = reassign_atom_idx(reactant)

    Chem.SanitizeMol(product)
    Chem.SanitizeMol(reactant)

    # Find chiral atoms - including label chirality
    chiral_atoms_product = Chem.FindMolChiralCenters(product,
                                                     includeUnassigned=True)

    unchanged_atoms = []
    for atom, chiral_tag in chiral_atoms_product:
        product_neighbors = [
            a.GetIdx() for a in product.GetAtomWithIdx(atom).GetNeighbors()
        ]
        reactant_neighbors = [
            a.GetIdx() for a in reactant.GetAtomWithIdx(atom).GetNeighbors()
        ]

        if sorted(product_neighbors) == sorted(reactant_neighbors):
            unchanged_atoms.append(atom)

    # make combinations of isomers.
    opts = StereoEnumerationOptions(onlyUnassigned=False, unique=False)
    rdmolops.AssignStereochemistry(product,
                                   cleanIt=True,
                                   flagPossibleStereoCenters=True,
                                   force=True)

    product_isomers = []
    product_isomers_mols = []
    for product_isomer in EnumerateStereoisomers(product, options=opts):
        rdmolops.AssignStereochemistry(product_isomer, force=True)
        for atom in unchanged_atoms:
            reactant_global_tag = reactant.GetAtomWithIdx(atom).GetProp(
                '_CIPCode')

            # TODO make sure that the _CIPRank is the same for atom in reactant and product.
            product_isomer_global_tag = product_isomer.GetAtomWithIdx(
                atom).GetProp('_CIPCode')
            if reactant_global_tag != product_isomer_global_tag:
                product_isomer.GetAtomWithIdx(atom).InvertChirality()

        if Chem.MolToSmiles(product_isomer) not in product_isomers:
            product_isomers.append(Chem.MolToSmiles(product_isomer))
            product_isomers_mols.append(product_isomer)

    return product_isomers_mols
def write_xyz_file(mol, file_name):

    """
    Embeds a mol object to get 3D coordinates which are written to an .xyz file
    """

    n_atoms = mol.GetNumAtoms()
    charge = Chem.GetFormalCharge(mol)
    symbols = [a.GetSymbol() for a in mol.GetAtoms()]

    Chem.SanitizeMol(mol)
    rdmolops.AssignStereochemistry(mol)
    AllChem.EmbedMolecule(mol, maxAttempts=10000)
    AllChem.MMFFOptimizeMolecule(mol, ignoreInterfragInteractions=False)

    with open(file_name, 'w') as _file:
        _file.write(str(n_atoms)+'\n\n')
        for atom, symbol in enumerate(symbols):
            coord = mol.GetConformers()[0].GetAtomPosition(atom)
            line = " ".join((symbol, str(coord.x), str(coord.y), str(coord.z),
                             "\n"))
            _file.write(line)
        if charge != 0:
            _file.write("$set\n")
            _file.write("chrg "+str(charge)+"\n")
            _file.write("$end")
def extract_smiles(xyz_file, charge, allow_charge=True, check_ac=False):
    """
    uses xyz2mol to extract smiles with as much 3d structural information as
    possible
    """
    atoms, _, xyz_coordinates = xyz2mol_local.read_xyz_file(xyz_file)
    try:
        input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=True, use_atom_maps=True,
                                          embed_chiral=True)
    except:
        input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=False, use_atom_maps=True,
                                          embed_chiral=True)

    input_mol = reorder_atoms_to_map(input_mol)
    structure_mol, res_status = choose_resonance_structure(input_mol)
    structure_mol = chiral_tags(structure_mol)
    rdmolops.AssignStereochemistry(structure_mol)
    structure_smiles = Chem.MolToSmiles(structure_mol)

    if check_ac:
        global AC_SAME
        ac = Chem.GetAdjacencyMatrix(input_mol)
        if not np.all(AC == ac):
            AC_SAME = False
            print("change in AC: stopping")

    return structure_smiles, GetFormalCharge(structure_mol), res_status
def choose_resonance_structure(mol):
    """
    This function creates all resonance structures of the mol object, counts
    the number of rotatable bonds for each structure and chooses the one with
    fewest rotatable bonds (most 'locked' structure)
    """
    resonance_mols = rdchem.ResonanceMolSupplier(mol,
                                                 rdchem.ResonanceFlags.ALLOW_CHARGE_SEPARATION)
    res_status = True
    new_mol = None
    if not resonance_mols:
        print("using input mol")
        new_mol = mol
        res_status = False
    for res_mol in resonance_mols:
        Chem.SanitizeMol(res_mol)
        n_rot_bonds = Chem.rdMolDescriptors.CalcNumRotatableBonds(res_mol)
        if new_mol is None:
            smallest_rot_bonds = n_rot_bonds
            new_mol = res_mol
        if n_rot_bonds < smallest_rot_bonds:
            smallest_rot_bonds = n_rot_bonds
            new_mol = res_mol

    Chem.DetectBondStereochemistry(new_mol, -1)
    rdmolops.AssignStereochemistry(new_mol, flagPossibleStereoCenters=True,
                                   force=True)
    Chem.AssignAtomChiralTagsFromStructure(new_mol, -1)
    return new_mol, res_status
def chiral_tags(mol):
    """
    Tag methylene and methyl groups with a chiral tag priority defined
    from the atom index of the hydrogens
    """
    li_list = []
    smarts_ch2 = '[!#1][*]([#1])([#1])([!#1])'
    atom_sets = mol.GetSubstructMatches(Chem.MolFromSmarts(smarts_ch2))
    for atoms in atom_sets:
        atoms = sorted(atoms[2:4])
        prioritized_H = atoms[-1]
        li_list.append(prioritized_H)
        mol.GetAtoms()[prioritized_H].SetAtomicNum(9)
    smarts_ch3 = '[!#1][*]([#1])([#1])([#1])'
    atom_sets = mol.GetSubstructMatches(Chem.MolFromSmarts(smarts_ch3))
    for atoms in atom_sets:
        atoms = sorted(atoms[2:])
        H1 = atoms[-1]
        H2 = atoms[-2]
        li_list.append(H1)
        li_list.append(H2)
        mol.GetAtoms()[H1].SetAtomicNum(9)
        mol.GetAtoms()[H2].SetAtomicNum(9)

    Chem.AssignAtomChiralTagsFromStructure(mol, -1)
    rdmolops.AssignStereochemistry(mol)
    for atom_idx in li_list:
        mol.GetAtoms()[atom_idx].SetAtomicNum(1)

    return mol
def extract_smiles(xyz_file, charge, allow_charge=True):
    """
    uses xyz2mol to extract smiles with as much 3d structural information as
    possible
    """
    atoms, _, xyz_coordinates = xyz2mol_local.read_xyz_file(xyz_file)
    try:
        input_mol = xyz2mol_local.xyz2mol(atoms,
                                          xyz_coordinates,
                                          charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=True,
                                          use_atom_maps=True,
                                          embed_chiral=True)
    except:
        input_mol = xyz2mol_local.xyz2mol(atoms,
                                          xyz_coordinates,
                                          charge=charge,
                                          use_graph=True,
                                          allow_charged_fragments=allow_charge,
                                          use_huckel=False,
                                          use_atom_maps=True,
                                          embed_chiral=True)

    input_mol = reorder_atoms_to_map(input_mol)
    structure_mol, res_status = choose_resonance_structure(input_mol)
    structure_mol = chiral_tags(structure_mol)
    rdmolops.AssignStereochemistry(structure_mol)
    structure_smiles = Chem.MolToSmiles(structure_mol)

    return structure_smiles, GetFormalCharge(structure_mol), res_status
Example #8
0
def construct_mol_features(mol: rdchem.Mol,
                           out_size: Optional[int] = -1) -> np.ndarray:
    """Returns the atom features of all the atoms in the molecule.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        Molecule of interest.

    out_size: int, optional, default=-1
        The size of the returned array. If this option is negative, it 
        does not take any effect. Otherwise, it must be larger than or 
        equal to the number of atoms in the input molecule. If so, the 
        end of the array is padded with zeros.

    Returns:
    --------
    mol_feats: np.ndarray, shape=(n,m)
        Where `n` is the total number of atoms within the molecule, and 
        `m` is the number of feats.
    """
    # Caluclate charges and chirality of atoms within molecule
    rdPartialCharges.ComputeGasteigerCharges(
        mol)  # stored under _GasteigerCharge
    rdmolops.AssignStereochemistry(
        mol)  # stored under _CIPCode, see doc for more info

    # Retrieve atom index locations of matches
    HYDROGEN_DONOR = rdmolfiles.MolFromSmarts(
        "[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0])" + ",n&H1&+0]")
    HYROGEN_ACCEPTOR = rdmolfiles.MolFromSmarts(
        "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])])" +
        ",$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),n&H0&+0," +
        "$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]")
    ACIDIC = rdmolfiles.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
    BASIC = rdmolfiles.MolFromSmarts(
        "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))])" +
        ",$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))])," +
        "$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]")
    hydrogen_donor_match = sum(mol.GetSubstructMatches(HYDROGEN_DONOR), ())
    hydrogen_acceptor_match = sum(mol.GetSubstructMatches(HYROGEN_ACCEPTOR),
                                  ())
    acidic_match = sum(mol.GetSubstructMatches(ACIDIC), ())
    basic_match = sum(mol.GetSubstructMatches(BASIC), ())

    # Get ring info
    ring = mol.GetRingInfo()

    mol_feats = []
    n_atoms = mol.GetNumAtoms()
    for atom_idx in range(n_atoms):
        atom = mol.GetAtomWithIdx(atom_idx)

        atom_feats = []
        atom_feats += one_hot(atom.GetSymbol(), [
            'C', 'O', 'N', 'S', 'Cl', 'F', 'Br', 'P', 'I', 'Si', 'B', 'Na',
            'Sn', 'Se', 'other'
        ])
        atom_feats += one_hot(atom.GetDegree(), [1, 2, 3, 4, 5, 6])
        atom_feats += one_hot(atom.GetHybridization(),
                              list(rdchem.HybridizationType.names.values()))
        atom_feats += one_hot(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6])
        atom_feats += one_hot(atom.GetFormalCharge(), [-3, -2, -1, 0, 1, 2, 3])
        g_charge = float(atom.GetProp("_GasteigerCharge"))
        atom_feats += [g_charge] if not np.isnan(g_charge) else [0.]
        atom_feats += [atom.GetIsAromatic()]

        atom_feats += [
            ring.IsAtomInRingOfSize(atom_idx, size) for size in range(3, 9)
        ]
        atom_feats += one_hot(atom.GetTotalNumHs(), [0, 1, 2, 3, 4])

        # Chirality
        try:
            atom_feats += one_hot(atom.GetProp('_CIPCode'), ["R", "S"]) + [
                atom.HasProp("_ChiralityPossible")
            ]
        except:
            atom_feats += [False, False] + [atom.HasProp("_ChiralityPossible")]
        # Hydrogen bonding
        atom_feats += [atom_idx in hydrogen_donor_match]
        atom_feats += [atom_idx in hydrogen_acceptor_match]
        # Is Acidic/Basic
        atom_feats += [atom_idx in acidic_match]
        atom_feats += [atom_idx in basic_match]

        mol_feats.append(atom_feats)

    if out_size < 0:
        return np.array(mol_feats, dtype=np.float)
    elif out_size >= n_atoms:
        # 'empty' padding for `mol_feats`. Generate(s) feature matrix of same size for all mols
        # NOTE: len(mol_feats[0]) is the number of feats
        padded_mol_feats = np.zeros((out_size, len(mol_feats[0])),
                                    dtype=np.float)
        padded_mol_feats[:n_atoms] = np.array(mol_feats, dtype=np.float)
        return padded_mol_feats
    else:
        raise ValueError(
            '`out_size` (N={}) must be negative or larger than or '
            'equal to the number of atoms in the input molecules (N={}).'.
            format(out_size, n_atoms))
    smiles_set = set()
    props = Properties()
    n_vec = props.GetPropertyNames()

    rdk_mols = []
    for row in table:
        mol = filter_extract_mol(row, headers_dict)
        if (mol is None):
            continue
        row[smiles_idx] = Chem.MolToSmiles(mol)
        if (row[smiles_idx] in smiles_set):
            continue
        smiles_set.add(row[smiles_idx])
        mol.SetProp('_Name', row[chemblid_idx])
        rdmolops.AssignStereochemistry(mol)
        p_vec = props.ComputeProperties(mol)
        too_flexible = False
        for name, value in zip(n_vec, p_vec):
            if (name == 'NumRotatableBonds' and value > 5):
                too_flexible = True
                break
            mol.SetProp('RDK{0:s}'.format(name), '{0:.3f}'.format(value))
        if (too_flexible):
            continue
        for i in range(pchembl_idx + 1):
            mol.SetProp(headers[i], row[i])
        rdDepictor.Compute2DCoords(mol)
        p.ligands.append(mol)
        rdk_mols.append(mol)