Exemple #1
0
def all_join_on_attach_point(mol1, mol2):
    """Join two molecules on all possible attaching point

    Arguments
    ---------
        mol1: <Chem.Mol>
            input molecule 1
        mol2: <Chem.Mol>
            input molecule 2

    Returns:
        iterator of all possible way to attach both molecules from dummy indicators.
    """
    atom_map_min = 100
    mol_idxs = []
    count = 0
    mod_mols = []
    for ind, m in enumerate([mol1, mol2]):
        atms = [(a.GetIdx(), a) for a in m.GetAtoms()
                if not a.IsInRing() and a.GetAtomicNum() == 0]
        atms.sort(reverse=True, key=operator.itemgetter(0))
        for a_idx, a in atms:
            for a_nei in a.GetNeighbors():
                a_nei.SetAtomMapNum(atom_map_min + count)
                count += 1
        mod_mol = dm.fix_mol(m)
        mod_mols.append(mod_mol)
        mol_idxs.append([
            a.GetIdx() for a in mod_mol.GetAtoms()
            if a.GetAtomMapNum() >= atom_map_min
        ])
    for ind1, ind2 in itertools.product(*mol_idxs):
        yield random_fragment_add(copy.copy(mod_mols[0]),
                                  copy.copy(mod_mols[1]), ind1, ind2)
def _preprocess(i, row):
#     print('hello')
    mol = dm.to_mol(str(row[smiles_column]), ordered=True)
    mol = dm.fix_mol(mol)
    mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
    mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)
    
    fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
    pars = { "radius": 2,
                     "nBits": 8192,
                     "invariants": [],
                     "fromAtoms": [],
                     "useChirality": True,
                     "useBondTypes": True,
                     "useFeatures": False,
            }
    fp = fingerprint_function(mol, **pars)

    row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol))
    row["selfies"] = dm.to_selfies(mol)
    row["inchi"] = dm.to_inchi(mol)
    row["inchikey"] = dm.to_inchikey(mol)
    row["onbits_fp"] =list(fp.GetOnBits())
    
    return row
Exemple #3
0
def recap(
    mol: Chem.Mol,
    remove_parent: bool = False,
    sanitize: bool = True,
    fix: bool = True,
):
    """Fragment the molecule using the recap algorithm.

    Args:
        mol: a molecule.
        remove_parent: Remove parent from the fragments.
        sanitize: Wether to sanitize the fragments.
        fix: Wether to fix the fragments.
    """
    res = Recap.RecapDecompose(mol)
    frags = [dm.to_mol(x) for x in res.GetAllChildren().keys()]

    if fix:
        frags = [dm.fix_mol(x) for x in frags]
    if sanitize:
        frags = [dm.sanitize_mol(x) for x in frags]

    frags = [x for x in frags if x is not None]

    if remove_parent:
        return frags
    return [mol] + frags
Exemple #4
0
def frag(
    mol: Chem.Mol,
    remove_parent: bool = False,
    sanitize: bool = True,
    fix: bool = True,
):
    """Generate all possible fragmentation of a molecule.

    Args:
        mol: a molecule.
        remove_parent: Remove parent from the fragments.
        sanitize: Wether to sanitize the fragments.
        fix: Wether to fix the fragments.
    """
    frags = FraggleSim.generate_fraggle_fragmentation(mol)

    smiles = set([])
    for seq in frags:
        smiles |= {s.strip() for s in seq.split(".")}

    smiles = list(sorted(smiles, reverse=True))
    frags = [dm.to_mol(s) for s in smiles]

    if fix:
        frags = [dm.fix_mol(x) for x in frags]
    if sanitize:
        frags = [dm.sanitize_mol(x) for x in frags]

    frags = [x for x in frags if x is not None]

    if remove_parent:
        return frags
    return [mol] + frags
Exemple #5
0
def brics(
    mol: Chem.Mol,
    singlepass: bool = True,
    remove_parent: bool = False,
    sanitize: bool = True,
    fix: bool = True,
):
    """Run BRICS on the molecules and potentially fix dummy atoms.

    Args:
        mol: a molecule.
        singlepass: Single pass for `BRICSDecompose`.
        remove_parent: Remove parent from the fragments.
        sanitize: Wether to sanitize the fragments.
        fix: Wether to fix the fragments.
    """
    frags = BRICS.BRICSDecompose(mol, returnMols=True, singlePass=singlepass)
    frags = list(frags)

    if fix:
        frags = [dm.fix_mol(x) for x in frags]
    if sanitize:
        frags = [dm.sanitize_mol(x) for x in frags]
    if remove_parent:
        frags.pop(0)

    frags = [x for x in frags if x is not None]

    return frags
Exemple #6
0
def compute_reaction_product(out, single_output=True):
    """Compute the product of a reaction"""
    out = [dm.fix_mol(x[0], n_iter=0) for x in out]
    if not single_output:
        return [dm.sanitize_mol(x) for x in out]
    # Might be a important to make a tradeoff decision in selecting products for greater speed.
    # product = sorted(out, key=lambda x: MoleculeEnv.compute_reward_from_mol(x, True))[-1]
    # sampling from list of products is an alternative
    return dm.sanitize_first(np.random.permutation(out))
Exemple #7
0
def test_fixmol():
    sm = "C.Cl.CC.[H][N:1]1(C)=CC(O)=CC2CCCCC12"
    mol = Chem.MolFromSmiles(sm, sanitize=False)
    # mol.UpdatePropertyCache(False)
    # Chem.Kekulize(mol)
    res = dm.fix_mol(mol, n_iter=1)  # copy by default

    # should still be invalid in term of valence for nitrogen
    assert not dm.incorrect_valence(res)

    res2 = dm.fix_mol(mol, n_iter=2)
    # not expecting difference between res2 and res3
    assert Chem.MolToSmiles(res) == Chem.MolToSmiles(res2)

    # only largest expected_here
    res_largest = dm.fix_mol(mol, largest_only=True)

    dm.fix_mol(mol, remove_singleton=True, largest_only=True)
    assert len(Chem.rdmolops.GetMolFrags(res_largest)) == 1

    expected_largest_fix = dm.standardize_smiles("OC1=CC2CCCCC2[N:1]=C1")
    assert dm.standardize_smiles(
        Chem.MolToSmiles(res_largest)) == expected_largest_fix

    res_no_singleton = dm.fix_mol(mol, n_iter=2, remove_singleton=True)
    assert len(Chem.rdmolops.GetMolFrags(res_largest)) == 1
    assert len(Chem.rdmolops.GetMolFrags(res_no_singleton)) == 2
def _preprocess(i, row):
    #     print('hello')
    try:
        mol = dm.to_mol(str(row[smiles_column]), ordered=True)
        mol = dm.fix_mol(mol)
        mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
        mol = dm.standardize_mol(mol,
                                 disconnect_metals=False,
                                 normalize=True,
                                 reionize=True,
                                 uncharge=False,
                                 stereo=True)
        opts = StereoEnumerationOptions(unique=True,
                                        maxIsomers=20,
                                        rand=0xf00d)
        isomers = EnumerateStereoisomers(mol, options=opts)
        enum_smiles = sorted(
            Chem.MolToSmiles(y, isomericSmiles=True) for y in isomers)

        smiles_list = []
        for count, smi in enumerate(enum_smiles):
            smiles_string = smi

            smiles_list.append(smiles_string)
        # fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
        # pars = { "radius": 2,
        #                  "nBits": 8192,
        #                  "invariants": [],
        #                  "fromAtoms": [],
        #                  "useChirality": False,
        #                  "useBondTypes": True,
        #                  "useFeatures": False,
        #         }
        # fp = fingerprint_function(mol, **pars)

        row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol))
        row["selfies"] = dm.to_selfies(mol)
        row["inchi"] = dm.to_inchi(mol)
        row["inchikey"] = dm.to_inchikey(mol)
        row["enumerated_smiles"] = smiles_list
        # row["onbits_fp"] =list(fp.GetOnBits())

        return row

    except ValueError:
        row["standard_smiles"] = 'dropped'
        row["selfies"] = 'dropped'
        row["inchi"] = 'dropped'
        row["inchikey"] = 'dropped'
        row["enumerated_smiles"] = list('dropped')
        return row
Exemple #9
0
def trim_side_chain(mol: Chem.rdchem.Mol, core, unwanted_side_chains):
    """Trim list of side chain from a molecule."""

    mol = Chem.AddHs(mol)

    match = mol.GetSubstructMatch(core)
    map2idx = {}
    map2nei = {}
    unwanted2map = {}
    for patt in unwanted_side_chains:
        unwanted2map[patt] = [
            a.GetAtomMapNum() for a in patt.GetAtoms() if a.GetAtomMapNum()
        ]
    unwanted_mapping = list(
        itertools.chain.from_iterable(unwanted2map.values()))

    for atom in core.GetAtoms():
        num = atom.GetAtomMapNum()
        if num and num in unwanted_mapping:
            mol_atom_idx = match[atom.GetIdx()]
            map2idx[mol_atom_idx] = num
            nei_atoms = mol.GetAtomWithIdx(mol_atom_idx).GetNeighbors()
            map2nei[mol_atom_idx] = [
                n.GetIdx() for n in nei_atoms if n.GetIdx() in match
            ]

    emol = Chem.EditableMol(mol)
    for atom_idx, atom_map in map2idx.items():
        dummy = Chem.rdchem.Atom("*")
        dummy.SetAtomMapNum(atom_map)
        nei_idx = map2nei.get(atom_idx, [None])[0]
        if nei_idx:
            bond = mol.GetBondBetweenAtoms(atom_idx, nei_idx)
            emol.RemoveBond(atom_idx, nei_idx)
            new_ind = emol.AddAtom(dummy)
            emol.AddBond(nei_idx, new_ind, bond.GetBondType())

    mol = emol.GetMol()
    mol = Chem.RemoveHs(mol)
    query_param = AdjustQueryParameters()
    query_param.makeDummiesQueries = False
    query_param.adjustDegree = False
    query_param.aromatizeIfPossible = True
    for patt, _ in unwanted2map.items():
        cur_frag = dm.fix_mol(patt)
        mol = Chem.DeleteSubstructs(mol, cur_frag, onlyFrags=True)

    return dm.keep_largest_fragment(mol)
Exemple #10
0
def break_mol(
    mol: Chem.Mol,
    minFragmentSize: int = 1,
    silent: bool = True,
    onlyUseReactions: list = [],
    randomize: bool = False,
    mode: str = "brics",
    returnTree: bool = False,
):
    """Breaks a molecules into a list of fragment."""

    if mode.lower() == "brics":
        all_reactions = ALL_BRICS
        all_reactions_type = ALL_BRICS_TYPE
    elif mode.lower() == "rxn":
        all_reactions = ALL_RXNS
        all_reactions_type = ALL_RXNS_TYPE
    else:
        all_reactions = ALL_BRICS + ALL_RXNS
        all_reactions_type = ALL_BRICS_TYPE + ALL_RXNS_TYPE
    if randomize:
        p = np.random.permutation(len(all_reactions))
        all_reactions = [all_reactions[ind] for ind in p]
        all_reactions_type = [all_reactions_type[ind] for ind in p]

    nx = dm.graph._get_networkx()
    mSmi = Chem.MolToSmiles(mol, isomericSmiles=True)
    G = nx.DiGraph()
    node_num = 0
    G.add_node(node_num, smiles=mSmi, mol=mol)
    allNodes = set()
    activePool = {mSmi: node_num}
    allNodes.add(mSmi)
    while activePool:
        nSmi = list(activePool.keys())[0]
        parent = activePool.pop(nSmi)
        node = G.nodes[parent]
        mol = node["mol"]
        for rxnIdx, reaction in zip(all_reactions_type, all_reactions):
            if onlyUseReactions and rxnIdx not in onlyUseReactions:
                continue
            ps = reaction.RunReactants((mol,))
            if ps:

                all_pass = [
                    all([prod.GetNumAtoms(onlyExplicit=True) > minFragmentSize for prod in p_])
                    for p_ in ps
                ]
                nz_i = 0
                while nz_i < len(all_pass) and not all_pass[nz_i]:
                    nz_i += 1
                if not silent:
                    print(nSmi, "->", len(ps), "products and selected ", nz_i)
                    # display(MolsToGridImage(list(itertools.chain(*list(ps))), molsPerRow=2))
                prodSeq = ps[nz_i % len(all_pass)]
                seqOk = True
                # we want to disqualify small fragments, so sort the product sequence by size
                prodSeq = [(prod.GetNumAtoms(onlyExplicit=True), prod) for prod in prodSeq]
                prodSeq.sort(key=lambda x: x[0])
                for _, prod in prodSeq:
                    prod.sanitized = True
                    try:
                        Chem.SanitizeMol(prod)
                    except:
                        if dm.sanitize_mol(prod) is None:
                            seqOk = False
                            break
                        continue
                    pSmi = Chem.MolToSmiles(prod, isomericSmiles=True)
                    seqOk = seqOk and (dm.to_mol(pSmi) is not None)

                    notDummies = sum([atm.GetSymbol() != "*" for atm in prod.GetAtoms()])
                    # nDummies = pSmi.count('*')
                    # if minFragmentSize > 0 and (nats - nDummies < minFragmentSize):
                    if minFragmentSize > 0 and notDummies < minFragmentSize:
                        seqOk = False
                        break
                    prod.pSmi = pSmi

                if seqOk:
                    for _, prod in prodSeq:
                        if not prod.sanitized:
                            continue
                        pSmi = prod.pSmi
                        node_num += 1
                        usmi = Chem.MolToSmiles(dm.fix_mol(prod), isomericSmiles=True)
                        G.add_node(node_num, smiles=usmi, mol=prod)
                        G.add_edge(parent, node_num)
                        if usmi not in allNodes:
                            activePool[pSmi] = node_num
                            allNodes.add(usmi)
                    G.nodes[parent]["rxn"] = rxnIdx
                    break  # at least one reaction matches

    leaves_smiles = [
        G.nodes[n]["smiles"] for n in G.nodes() if G.in_degree(n) != 0 and G.out_degree(n) == 0
    ]
    if returnTree:
        return leaves_smiles, allNodes, G
    return leaves_smiles, allNodes
Exemple #11
0
def _preprocess(i, row):
    '''Takes a smiles string and generates a clean rdkit mol with datamol. The stereoisomers
    are then enumerated while holding defined stereochemistry. Morgan fingerprints are then
    generated using RDkit with and without stereochemistry. The try/except logic deals with 
    RDkit mol failures on conversion of an invalid smiles string. Smarts are added for later
    searching.'''
    try:
        mol = dm.to_mol(str(row[smiles_column]), ordered=True)
        mol = dm.fix_mol(mol)
        mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
        mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)
        opts = StereoEnumerationOptions(unique=True,maxIsomers=20,rand=0xf00d)
        isomers = EnumerateStereoisomers(mol, options=opts)
        enum_smiles = sorted(Chem.MolToSmiles(y,isomericSmiles=True) for y in isomers)
#         enum_dm_smiles = sorted(dm.standardize_smiles(dm.to_smiles(x)) for x in isomers)
        
        smiles_list = []
        achiral_fp_lis = []
        chiral_fp_lis = []
        
#         standard_smiles_list = []
        for count, smi in enumerate(enum_smiles):
            smiles_string = smi
            
            mol = dm.to_mol(smi, ordered=True)
            mol = dm.fix_mol(mol)
            mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
            mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)

            fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
            
            pars = { "radius": 2,
                             "nBits": 8192,
                             "invariants": [],
                             "fromAtoms": [],
                             "useChirality": True,
                             "useBondTypes": True,
                             "useFeatures": False, }
            
            pars2 = { "radius": 2,
                             "nBits": 8192,
                             "invariants": [],
                             "fromAtoms": [],
                             "useChirality": False,
                             "useBondTypes": True,
                             "useFeatures": False, }

            fp = fingerprint_function(mol, **pars)
            fp1 = fingerprint_function(mol, **pars2)
            smiles_list.append(dm.standardize_smiles(smiles_string))
            achiral_fp_lis.append(list(fp1.GetOnBits()))
            chiral_fp_lis.append(list(fp.GetOnBits()))

        row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol))
        row["smarts"] = dm.to_smarts(mol)
        row["selfies"] = dm.to_selfies(mol)
        row["enumerated_smiles"] = smiles_list
        row["achiral_fp"] = achiral_fp_lis
        row["chiral_fp"] = chiral_fp_lis
#         row["dm_enumerated_smiles"] = enum_dm_smiles_lis
        # row["onbits_fp"] =list(fp.GetOnBits())
        
        return row

    except ValueError:
#         row["standard_smiles"] = 'dropped'
#         row["selfies"] = 'dropped'
#         row["inchi"] = 'dropped'
#         row["inchikey"] = 'dropped'
        
        row["standard_smiles"] = 'dropped'
        row["smarts"] = 'dropped'
        row["selfies"] = 'dropped'
        row["enumerated_smiles"] = list('dropped')
        row["achiral_fp"] = list('dropped')
        row["chiral_fp"] = list('dropped')
#         row["dm_enumerated_smiles"] = 'dropped'
        return row