Exemple #1
0
def edit_mol(rmol, edits, tatoms):
    new_mol = Chem.RWMol(rmol)
    [a.SetNumExplicitHs(0) for a in new_mol.GetAtoms()]

    amap = {}
    for atom in rmol.GetAtoms():
        amap[atom.GetAtomMapNum() - 1] = atom.GetIdx()

    for x, y, t, v in edits:
        bond = new_mol.GetBondBetweenAtoms(amap[x], amap[y])
        # a1 = new_mol.GetAtomWithIdx(amap[x])
        # a2 = new_mol.GetAtomWithIdx(amap[y])
        if bond is not None:
            new_mol.RemoveBond(amap[x], amap[y])
        if t > 0:
            new_mol.AddBond(amap[x], amap[y], BOND_FLOAT_TO_TYPE[t])

    pred_mol = new_mol.GetMol()
    pred_smiles = Chem.MolToSmiles(pred_mol)
    pred_list = pred_smiles.split('.')
    pred_mols = []
    for pred_smiles in pred_list:
        mol = Chem.MolFromSmiles(pred_smiles)
        if mol is None: continue
        atom_set = set([atom.GetAtomMapNum() - 1 for atom in mol.GetAtoms()])
        if len(atom_set & tatoms) == 0:
            continue
        for atom in mol.GetAtoms():
            atom.SetAtomMapNum(0)
        pred_mols.append(mol)

    return '.'.join(
        sorted([Chem.MolToSmiles(pred_mol) for pred_mol in pred_mols]))
Exemple #2
0
    def __init__(self, smiles=None, rdk=None, conv_enabled=False):
        """Constructor
        Keyword Arguments:
            smiles {str} -- SMILES representation of a molecule (default: {None})
            rdk {rdkit Mol} -- molecule as an RDKit object (default: {None})
            conv_enabled {bool} -- whether to set both smiles and graph
               arguments here or lazily defer until called
               (default: {False})
        Raises:
            ValueError -- if neither a correct smiles string
                or a rdkit mol are provided
        """
        if conv_enabled:
            if isinstance(smiles, str):
                # also checks if smiles can be parsed
                rdk = Chem.MolFromSmiles(smiles)
                assert rdk is not None
            elif rdk is not None:
                smiles = Chem.MolToSmiles(rdk)
            else:
                raise ValueError("Invalid arguments")

        self.smiles = smiles
        self.rdk = rdk
        self.graph = None  # should be obtained from rdk when needed
        self.synthesis_path = []  # list of Reactions
        self.begin_flag = True
Exemple #3
0
def processMols(mols):
    print('smiles\tName\tsa_score')
    for i, m in enumerate(mols):
        if m is None:
            continue

        s = calculateScore(m)

        smiles = Chem.MolToSmiles(m)
        print(smiles + "\t" + m.GetProp('_Name') + "\t%3f" % s)
 def sanitize_smiles(smi, largest_fragment=False):
     mol = Chem.MolFromSmiles(smi)
     if mol is None:
         return smi
     try:
         mol = standardizer.standardize(
             mol)  # standardize functional group reps
         if largest_fragment:
             mol = standardizer.largest_fragment(
                 mol)  # remove product counterions/salts/etc.
         mol = standardizer.uncharge(
             mol)  # neutralize, e.g., carboxylic acids
     except Exception:
         pass
     return Chem.MolToSmiles(mol)
Exemple #5
0
 def to_smiles(self):
     smiles = self.smiles
     if self.smiles is None:
         self.smiles = Chem.MolToSmiles(self.rdk)
     return self.smiles
def edit_mol(rmol, edits):
    new_mol = Chem.RWMol(rmol)

    # Keep track of aromatic nitrogens, might cause explicit hydrogen issues
    aromatic_nitrogen_idx = set()
    aromatic_carbonyl_adj_to_aromatic_nH = {}
    aromatic_carbondeg3_adj_to_aromatic_nH0 = {}
    for a in new_mol.GetAtoms():
        if a.GetIsAromatic() and a.GetSymbol() == 'N':
            aromatic_nitrogen_idx.add(a.GetIdx())
            for nbr in a.GetNeighbors():
                if a.GetNumExplicitHs() == 1 and nbr.GetSymbol(
                ) == 'C' and nbr.GetIsAromatic() and any(
                        b.GetBondTypeAsDouble() == 2 for b in nbr.GetBonds()):
                    aromatic_carbonyl_adj_to_aromatic_nH[
                        nbr.GetIdx()] = a.GetIdx()
                elif a.GetNumExplicitHs() == 0 and nbr.GetSymbol(
                ) == 'C' and nbr.GetIsAromatic() and len(nbr.GetBonds()) == 3:
                    aromatic_carbondeg3_adj_to_aromatic_nH0[
                        nbr.GetIdx()] = a.GetIdx()
        else:
            a.SetNumExplicitHs(0)
    new_mol.UpdatePropertyCache()

    amap = {}
    for atom in rmol.GetAtoms():
        amap[atom.GetIntProp('molAtomMapNumber')] = atom.GetIdx()

    # Apply the edits as predicted
    for x, y, t in edits:
        bond = new_mol.GetBondBetweenAtoms(amap[x], amap[y])
        a1 = new_mol.GetAtomWithIdx(amap[x])
        a2 = new_mol.GetAtomWithIdx(amap[y])
        if bond is not None:
            new_mol.RemoveBond(amap[x], amap[y])

            # Are we losing a bond on an aromatic nitrogen?
            if bond.GetBondTypeAsDouble() == 1.0:
                if amap[x] in aromatic_nitrogen_idx:
                    if a1.GetTotalNumHs() == 0:
                        a1.SetNumExplicitHs(1)
                    elif a1.GetFormalCharge() == 1:
                        a1.SetFormalCharge(0)
                elif amap[y] in aromatic_nitrogen_idx:
                    if a2.GetTotalNumHs() == 0:
                        a2.SetNumExplicitHs(1)
                    elif a2.GetFormalCharge() == 1:
                        a2.SetFormalCharge(0)

            # Are we losing a c=O bond on an aromatic ring? If so, remove H from adjacent nH if appropriate
            if bond.GetBondTypeAsDouble() == 2.0:
                if amap[x] in aromatic_carbonyl_adj_to_aromatic_nH:
                    new_mol.GetAtomWithIdx(
                        aromatic_carbonyl_adj_to_aromatic_nH[
                            amap[x]]).SetNumExplicitHs(0)
                elif amap[y] in aromatic_carbonyl_adj_to_aromatic_nH:
                    new_mol.GetAtomWithIdx(
                        aromatic_carbonyl_adj_to_aromatic_nH[
                            amap[y]]).SetNumExplicitHs(0)

        if t > 0:
            new_mol.AddBond(amap[x], amap[y], BOND_TYPE[t])

            # Special alkylation case?
            if t == 1:
                if amap[x] in aromatic_nitrogen_idx:
                    if a1.GetTotalNumHs() == 1:
                        a1.SetNumExplicitHs(0)
                    else:
                        a1.SetFormalCharge(1)
                elif amap[y] in aromatic_nitrogen_idx:
                    if a2.GetTotalNumHs() == 1:
                        a2.SetNumExplicitHs(0)
                    else:
                        a2.SetFormalCharge(1)

            # Are we getting a c=O bond on an aromatic ring? If so, add H to adjacent nH0 if appropriate
            if t == 2:
                if amap[x] in aromatic_carbondeg3_adj_to_aromatic_nH0:
                    new_mol.GetAtomWithIdx(
                        aromatic_carbondeg3_adj_to_aromatic_nH0[
                            amap[x]]).SetNumExplicitHs(1)
                elif amap[y] in aromatic_carbondeg3_adj_to_aromatic_nH0:
                    new_mol.GetAtomWithIdx(
                        aromatic_carbondeg3_adj_to_aromatic_nH0[
                            amap[y]]).SetNumExplicitHs(1)

    # Tried:
    # bonds_to_remove.sort(key=lambda x: x[0], reverse=True)
    # for (idx, bond) in bonds_to_remove:
    #     start = bond.GetBeginAtomIdx()
    #     end = bond.GetEndAtomIdx()
    #     new_mol.RemoveBond(start, end)
    # pred_mol = new_mol.GetMol()

    pred_mol = new_mol.GetMol()

    # Clear formal charges to make molecules valid
    # Note: because S and P (among others) can change valence, be more flexible
    for atom in pred_mol.GetAtoms():
        atom.ClearProp('molAtomMapNumber')
        if atom.GetSymbol() == 'N' and atom.GetFormalCharge(
        ) == 1:  # exclude negatively-charged azide
            bond_vals = sum(
                [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()])
            if bond_vals <= 3:
                atom.SetFormalCharge(0)
        elif atom.GetSymbol() == 'N' and atom.GetFormalCharge(
        ) == -1:  # handle negatively-charged azide addition
            bond_vals = sum(
                [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()])
            if bond_vals == 3 and any(
                [nbr.GetSymbol() == 'N' for nbr in atom.GetNeighbors()]):
                atom.SetFormalCharge(0)
        elif atom.GetSymbol() == 'N':
            bond_vals = sum(
                [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()])
            if bond_vals == 4 and not atom.GetIsAromatic(
            ):  # and atom.IsInRingSize(5)):
                atom.SetFormalCharge(1)
        elif atom.GetSymbol() == 'C' and atom.GetFormalCharge() != 0:
            atom.SetFormalCharge(0)
        elif atom.GetSymbol() == 'O' and atom.GetFormalCharge() != 0:
            bond_vals = sum(
                [bond.GetBondTypeAsDouble()
                 for bond in atom.GetBonds()]) + atom.GetNumExplicitHs()
            if bond_vals == 2:
                atom.SetFormalCharge(0)
        elif atom.GetSymbol() in ['Cl', 'Br', 'I', 'F'
                                  ] and atom.GetFormalCharge() != 0:
            bond_vals = sum(
                [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()])
            if bond_vals == 1:
                atom.SetFormalCharge(0)
        elif atom.GetSymbol() == 'S' and atom.GetFormalCharge() != 0:
            bond_vals = sum(
                [bond.GetBondTypeAsDouble() for bond in atom.GetBonds()])
            if bond_vals in [2, 4, 6]:
                atom.SetFormalCharge(0)
        elif atom.GetSymbol(
        ) == 'P':  # quartenary phosphorous should be pos. charge with 0 H
            bond_vals = [
                bond.GetBondTypeAsDouble() for bond in atom.GetBonds()
            ]
            if sum(bond_vals) == 4 and len(bond_vals) == 4:
                atom.SetFormalCharge(1)
                atom.SetNumExplicitHs(0)
            elif sum(bond_vals) == 3 and len(
                    bond_vals) == 3:  # make sure neutral
                atom.SetFormalCharge(0)
        elif atom.GetSymbol(
        ) == 'B':  # quartenary boron should be neg. charge with 0 H
            bond_vals = [
                bond.GetBondTypeAsDouble() for bond in atom.GetBonds()
            ]
            if sum(bond_vals) == 4 and len(bond_vals) == 4:
                atom.SetFormalCharge(-1)
                atom.SetNumExplicitHs(0)
        elif atom.GetSymbol() in ['Mg', 'Zn']:
            bond_vals = [
                bond.GetBondTypeAsDouble() for bond in atom.GetBonds()
            ]
            if sum(bond_vals) == 1 and len(bond_vals) == 1:
                atom.SetFormalCharge(1)
        elif atom.GetSymbol() == 'Si':
            bond_vals = [
                bond.GetBondTypeAsDouble() for bond in atom.GetBonds()
            ]
            if sum(bond_vals) == len(bond_vals):
                atom.SetNumExplicitHs(max(0, 4 - len(bond_vals)))

    # Bounce to/from SMILES to try to sanitize
    pred_smiles = Chem.MolToSmiles(pred_mol)  # <--- TODO: error occurs here
    pred_list = pred_smiles.split('.')
    pred_mols = [Chem.MolFromSmiles(pred_smiles) for pred_smiles in pred_list]

    for i, mol in enumerate(pred_mols):
        # Check if we failed/succeeded in previous step
        if mol is None:
            logging.debug('##### Unparseable mol: {}'.format(pred_list[i]))
            continue

        # Else, try post-sanitiztion fixes in structure
        mol = Chem.MolFromSmiles(Chem.MolToSmiles(mol))
        if mol is None:
            continue
        for rxn in clean_rxns_postsani:
            out = rxn.RunReactants((mol, ))
            if out:
                try:
                    Chem.SanitizeMol(out[0][0])
                    pred_mols[i] = Chem.MolFromSmiles(
                        Chem.MolToSmiles(out[0][0]))
                except Exception as e:
                    print(e)
                    print('Could not sanitize postsani reaction product: {}'.
                          format(Chem.MolToSmiles(out[0][0])))
                    print('Original molecule was: {}'.format(
                        Chem.MolToSmiles(mol)))
    pred_smiles = [
        Chem.MolToSmiles(pred_mol) for pred_mol in pred_mols
        if pred_mol is not None
    ]

    return pred_smiles
            thisrow.append(r)
            thisrow.append(p)

            # Save pbond information
            pbonds = {}
            for bond in pmol.GetBonds():
                a1 = idxfunc(bond.GetBeginAtom())
                a2 = idxfunc(bond.GetEndAtom())
                t = bond_types.index(bond.GetBondType())
                pbonds[(a1, a2)] = pbonds[(a2, a1)] = t + 1

            for atom in pmol.GetAtoms():
                atom.ClearProp('molAtomMapNumber')

            psmiles = Chem.MolToSmiles(pmol)
            psmiles_sani = set(sanitize_smiles(psmiles, True).split('.'))
            psmiles = set(psmiles.split('.'))

            thisrow.append('.'.join(psmiles))
            thisrow.append('.'.join(psmiles_sani))

            ########### Use *true* edits to try to recover product

            if opts.bonds_as_doubles:
                cbonds = []
                for gedit in gedits.split(';'):
                    x, y, t = gedit.split('-')
                    x, y, t = int(x), int(y), float(t)
                    cbonds.append((x, y, bond_types_as_double[t]))
            else: