def get_sgs(record_dict, n_min, n_max, method="exhaustive"):
    if method == "exhaustive":
        return Chem.rdmolops.FindAllSubgraphsOfLengthMToN(
            record_dict["mol"], n_min, n_max)

    elif method == "RECAP":
        hierarchy = Recap.RecapDecompose(record_dict["mol"])
        sgs = []
        for substructure in hierarchy.GetAllChildren().values():
            substructure = Chem.DeleteSubstructs(substructure.mol,
                                                 Chem.MolFromSmarts('[#0]'))
            edge_idxs = get_substructure_bond_idx(substructure,
                                                  record_dict["mol"])
            if edge_idxs is not None:
                sgs.append(edge_idxs)
        return subset_sgs_sizes([sgs], n_min, n_max)

    elif method == "BRICS":
        substructures = BRICS.BRICSDecompose(record_dict["mol"])
        sgs = []
        for substructure in substructures:
            substructure = Chem.DeleteSubstructs(
                Chem.MolFromSmiles(substructure), Chem.MolFromSmarts('[#0]'))
            edge_idxs = get_substructure_bond_idx(substructure,
                                                  record_dict["mol"])
            if edge_idxs is not None:
                sgs.append(edge_idxs)
        return subset_sgs_sizes([sgs], n_min, n_max)
Beispiel #2
0
    def get_fragments(self):
        fragments = None
        if False not in [self.is_mol(),
                         self.is_small(),
                         self.has_2_rings()]:  # 3 requirements fulfilled
            fragments = []

            # 1st add scf of the fragments
            hierarch = Recap.RecapDecompose(self.get_mol())
            ks = hierarch.children
            for s, obj in ks.items():
                m = obj.mol
                if (m is None) or (Chem.GetSSSR(m) < 2):
                    continue
                # Fragments into scaffolds conversion
                try:
                    core = MurckoScaffold.GetScaffoldForMol(m)
                except ValueError:  # scf calculation not possible
                    continue
                smiles_scf = Chem.MolToSmiles(core)
                if Chem.MolFromSmiles(smiles_scf) is None:
                    continue
                fragments.append(smiles_scf)

            # 2nd add scf of itself
            try:
                core = MurckoScaffold.GetScaffoldForMol(self.get_mol())
                smiles_scf = Chem.MolToSmiles(core)
                if Chem.MolFromSmiles(smiles_scf) is not None:
                    fragments.append(smiles_scf)
            except ValueError:  # scf calculation not possible
                pass

        return (fragments)
Beispiel #3
0
    def __init__(self,
                 smi,
                 lib_smiles,
                 core_smi=None,
                 lib_file=None,
                 fam_sub=True):
        self.smi = Chem.MolFromSmiles(smi)
        if lib_file is None:
            self.lib = gen_lib(lib_smiles)
        else:
            self.lib = load_lib(lib_file)

        if not fam_sub:
            self.core = Expand.get_stripped_core(self, core_smi)
            self.expanded = Expand.add(self)
        else:
            main_fragments = Recap.RecapDecompose(self.smi).children.keys()
            all_fragments = copy.deepcopy(self.lib)
            all_fragments.extend(list(main_fragments))
            affin_matrix = Expand.build_affin_matrix(self, all_fragments)
            families = Expand.build_families(self, all_fragments, affin_matrix)
            self.lib = Expand.get_family_map(self, families, main_fragments)
            self.expanded = Expand.fam_add(self)
            for x in self.expanded:
                print(x)
Beispiel #4
0
def pair_frags(fname, out, method='Recap', is_mf=True):
    smiles = pd.read_table(fname).Smiles.dropna()
    pairs = []
    for i, smile in enumerate(tqdm(smiles)):
        smile = utils.clean_mol(smile)
        mol = Chem.MolFromSmiles(smile)
        if method == 'recap':
            frags = np.array(sorted(Recap.RecapDecompose(mol).GetLeaves().keys()))
        else:
            frags = BRICS.BRICSDecompose(mol)
            frags = np.array(sorted({re.sub(r'\[\d+\*\]', '*', f) for f in frags}))
        if len(frags) == 1: continue
        du, hy = Chem.MolFromSmiles('*'), Chem.MolFromSmiles('[H]')
        subs = np.array([Chem.MolFromSmiles(f) for f in frags])
        subs = np.array([Chem.RemoveHs(Chem.ReplaceSubstructs(f, du, hy, replaceAll=True)[0]) for f in subs])
        subs = np.array([m for m in subs if m.GetNumAtoms() > 1])
        match = np.array([[m.HasSubstructMatch(f) for f in subs] for m in subs])
        frags = subs[match.sum(axis=0) == 1]
        frags = sorted(frags, key=lambda x:-x.GetNumAtoms())[:voc.n_frags]
        frags = [Chem.MolToSmiles(Chem.RemoveHs(f)) for f in frags]

        max_comb = len(frags) if is_mf else 1
        for ix in range(1, max_comb+1):
            combs = combinations(frags, ix)
            for comb in combs:
                input = '.'.join(comb)
                if len(input) > len(smile): continue
                if mol.HasSubstructMatch(Chem.MolFromSmarts(input)):
                    pairs.append([input, smile])
    df = pd.DataFrame(pairs, columns=['Frags', 'Smiles'])
    df.to_csv(out, sep='\t',  index=False)
Beispiel #5
0
def recap(
    mol: Chem.Mol,
    remove_parent: bool = False,
    sanitize: bool = True,
    fix: bool = True,
):
    """Fragment the molecule using the recap algorithm.

    Args:
        mol: a molecule.
        remove_parent: Remove parent from the fragments.
        sanitize: Wether to sanitize the fragments.
        fix: Wether to fix the fragments.
    """
    res = Recap.RecapDecompose(mol)
    frags = [dm.to_mol(x) for x in res.GetAllChildren().keys()]

    if fix:
        frags = [dm.fix_mol(x) for x in frags]
    if sanitize:
        frags = [dm.sanitize_mol(x) for x in frags]

    frags = [x for x in frags if x is not None]

    if remove_parent:
        return frags
    return [mol] + frags
def main(name, argv):
    if len(argv) != 2:
        print_usage(name)
        return

    with open(argv[0], 'r') as f:
        smile = f.readline().split()[0]
    mol = Chem.MolFromSmiles(smile)
    size = mol.GetNumHeavyAtoms()
    hierarch = Recap.RecapDecompose(mol)
    children = []
    for child in hierarch.GetAllChildren().keys() + [smile]:
        new_smiles = child.replace('[*]', '[H]')
        new = Chem.MolFromSmiles(new_smiles)
        if not new == None:
            new_size = new.GetNumHeavyAtoms()
            if new_size > 7 and new_size <= 25:
                if rdMolDescriptors.CalcNumRotatableBonds(new) <= 5:
                    children.append(Chem.MolToSmiles(new, isomericSmiles=True))
                    #children.append(new_smiles)
                core_smile = MurckoScaffold.MurckoScaffoldSmilesFromSmiles(
                    new_smiles, includeChirality=True)
                core = Chem.MolFromSmiles(core_smile)
                if rdMolDescriptors.CalcNumRotatableBonds(
                        core) <= 5 and core.GetNumHeavyAtoms() > 7:
                    children.append(core_smile)
    with open(argv[1], 'w') as f:
        i = 1
        for m in set(children):
            if len(m) > 0:
                f.write(m + '\t' + str(i) + '\n')
                i += 1
Beispiel #7
0
def make_feat_data(mol, offset=1):
    res = []
    check_atom = set()
    nohmol = Chem.RemoveHs(mol)
    recap_res = Recap.RecapDecompose(nohmol)
    leaves = [
        key.replace('*', '').replace('()', '')
        for key in recap_res.GetLeaves().keys()
    ]
    leaves = [leave.replace('[H]', '') for leave in leaves if leave != '[H]']
    leaves = sorted(leaves,
                    key=lambda x: Chem.MolFromSmarts(x).GetNumAtoms(),
                    reverse=True)
    if len(leaves) == 0:

        line = [i for i in range(mol.GetNumAtoms())]
        line = [str(n + offset) for n in line]
        line = [Chem.MolToSmiles(mol)] + line
        return [line]
    for leavsmi in leaves:
        leav = Chem.MolFromSmarts(leavsmi)
        matches = mol.GetSubstructMatches(leav)
        for i, match in enumerate(matches):
            line = list(match)
            if len(check_atom & set(line)) > 0:
                continue
            check_atom = check_atom | set(line)
            for idx in match:
                nei = get_neighbor_h(idx, mol)
                line += nei
            line = [str(j + offset) for j in line]
            line = [leavsmi + '_' + str(i)] + line
            res.append(line)
    return res
Beispiel #8
0
def get_frag_list(mol):

    hierarch  = Recap.RecapDecompose(mol, minFragmentSize=5)
    frag_list = list()
    for frag_smi in hierarch.GetLeaves().keys():
        frag_mol = Chem.MolFromSmiles(frag_smi)
        frag_list.append(frag_mol)

    return frag_list
Beispiel #9
0
    def get_stripped_core(self, core_smi):
        if core_smi is None:
            print('No core provided generating core with Murcko Scaffold')
            core = MurckoScaffold.GetScaffoldForMol(self.smi)
            if Chem.MolToSmiles(core) == Chem.MolToSmiles(self.smi):
                print(
                    'Murcko Scaffold failed selecting largest fragment as core'
                )
                hierarch = Recap.RecapDecompose(self.smi).children.keys()
                tmp = Chem.MolFromSmiles(max(hierarch, key=len))
        else:
            core = Chem.MolFromSmiles(core_smi)
            tmp = Chem.ReplaceSidechains(self.smi, core)

        return tmp
def get_bicyclic(mol):
    bicyclic = []
    leaves = Recap.RecapDecompose(mol).GetLeaves()
    if len(leaves) != 0:
        fragments = leaves.keys()
        for fragment in fragments:
            fragment_obj = Chem.MolFromSmiles(fragment)
            sssr = Chem.GetSSSR(fragment_obj)
            if sssr >= 2:
                scaffold = Chem.MurckoDecompose(fragment_obj)
                bicyclic.append(Chem.MolToSmiles(scaffold))
    else:
        sssr = Chem.GetSSSR(mol)
        if sssr >= 2:
            scaffold = Chem.MurckoDecompose(mol)
            bicyclic.append(Chem.MolToSmiles(scaffold))
    return bicyclic
Beispiel #11
0
def build_library(in_smile,
                  frags,
                  lib,
                  rules=os.environ["COVALIB"] +
                  "/Code/Covalentizer/numbered_reaction.re",
                  linker_lib=False,
                  linker_smiles=''):
    argv = [in_smile, frags, lib]
    with open(argv[0], 'r') as f:
        smile = f.readline().split()[0]
    mol = Chem.MolFromSmiles(smile)
    if mol == None:
        return
    size = mol.GetNumHeavyAtoms()
    hierarch = Recap.RecapDecompose(mol)
    children = []
    for child in hierarch.GetAllChildren().keys() + [smile]:
        new_smiles = child.replace('[*]', '[H]')
        new = Chem.MolFromSmiles(new_smiles)
        if not new == None:
            new_size = new.GetNumHeavyAtoms()
            if new_size > 7:
                if new_size <= 25 and rdMolDescriptors.CalcNumRotatableBonds(
                        new) <= 5:
                    children.append(Chem.MolToSmiles(new, isomericSmiles=True))
                core_smile = MurckoScaffold.MurckoScaffoldSmilesFromSmiles(
                    new_smiles, includeChirality=True)
                core = Chem.MolFromSmiles(core_smile)
                if new_size <= 25 and rdMolDescriptors.CalcNumRotatableBonds(
                        core) <= 5 and core.GetNumHeavyAtoms() > 7:
                    children.append(core_smile)
    with open(argv[1], 'w') as f:
        i = 1
        for m in set(children):
            if len(m) > 0:
                f.write(m + '\t' + str(i) + '\n')
                i += 1

    if not linker_lib:
        multi_react([rules, argv[1], argv[2]])
    else:
        multi_linkers([rules, argv[1], argv[2]], linker_smiles)
Beispiel #12
0
    ts.append(t2 - t1)

if tests[10]:
    logger.info('Writing: Mol blocks')
    t1 = time.time()
    for mol in mols:
        mb = Chem.MolToMolBlock(mol)
    t2 = time.time()
    logger.info('Results10: %.2f seconds' % (t2 - t1))
    ts.append(t2 - t1)

if tests[11]:
    logger.info('RECAP decomposition')
    t1 = time.time()
    for mol in mols:
        d = Recap.RecapDecompose(mol)
    t2 = time.time()
    logger.info('Results11: %.2f seconds' % (t2 - t1))
    ts.append(t2 - t1)

if tests[12]:
    logger.info('Generate 2D coords')
    t1 = time.time()
    for mol in mols:
        AllChem.Compute2DCoords(mol)
    t2 = time.time()
    logger.info('Results12: %.2f seconds' % (t2 - t1))
    ts.append(t2 - t1)

if tests[13]:
    logger.info('Generate 3D coords')
    '/drug_development/studyRdkit/st_rdcit/data/2d.rxn')
ps = rxn.RunReactants(acid, base)
print('the num of reactions', len(ps))

for reaction in ps:
    reactions_smi = Chem.MolToSimiles(reaction)
    print('this reaction is', reactions_smi)

# 三、把分子切成片段
# 3.1 Recap方法
# Recap方法通过模拟实验室中常用的化学反应,将反应分子分解成功合理的片段
# Recap 方法返回的是类似于node tree结构的数据。
# > 注:分子片段的Smiles前面通常会加一个 * 如 ‘* c1ccccc1’

m = Chem.MolFromSmiles('c1ccccc1OCCOC(=O)CC')
hierarch = Recap.RecapDecompose(m)
type(hierarch)

# 层次结构的原始分子
print('smi=', hierarch.smiles)  # smi= CCC(=O)OCCOc1ccccc1

# 每个节点使用smiles键控的字典跟踪其子节点
ks = hierarch.children.keys()
print(sorted(ks))
# ['*C(=O)CC', '*CCOC(=O)CC', '*CCOc1ccccc1', '*OCCOc1ccccc1', '*c1ccccc1']
# # 3.2 BRICS方法
# RDKit 还提供了另一种把分子切成片段的方法——BRICS方法。 BRICS方法主要是根据可合成的的键对分子进行切断,因此其返回的数据结构是来自于该分子的不同分子片段, 虚拟原子(*)是告诉我们是如何切断的。

# 对下图中的分子进行BRICS分解
smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N'
m = Chem.MolFromSmiles(smi)
Beispiel #14
0
def gen_lib(smiles):
    mol_lib = [Chem.MolFromSmiles(x) for x in smiles]
    hierarch = [Recap.RecapDecompose(x).children.keys() for x in mol_lib]
    fragments = [j for i in hierarch for j in i]
    return fragments