Ejemplo n.º 1
0
    def test_AlignDepict(self):
        mol = Chem.MolFromSmiles('CNC')
        core = Chem.MolFromSmiles('CC')
        pattern = Chem.MolFromSmarts('CCC')
        self.assertRaises(ValueError, AlignDepict, mol, core, pattern)

        pattern = Chem.MolFromSmarts('CN')
        self.assertRaises(ValueError, AlignDepict, mol, core, pattern)

        pattern = Chem.MolFromSmarts('CC')
        self.assertRaises(ValueError, AlignDepict, mol, core, pattern)

        pattern = Chem.MolFromSmarts('CC')
        self.assertRaises(ValueError, AlignDepict, mol, core, pattern)

        mol = Chem.MolFromSmiles('CCC')
        Chem.rdDepictor.Compute2DCoords(core)
        AlignDepict(mol, core, pattern)

        mol = Chem.MolFromSmiles('CNC')
        AlignDepict(mol, core, pattern, acceptFailure=True)
                                 'name', 'alternate_names', 'mol', 'smiles',
                                 'name_rt', 'alternate_names_rt', 'mol_rt', 'smiles_rt']]


# In[73]:


if len(name_conflicts) == 0:
    display(HTML('<h2 style="color: green">No name conflicts!</h2>'))
for _, r in name_conflicts.iterrows():
    mol1 = r['mol']
    mol2 = r['mol_rt']
    mcs_result = rdFMCS.FindMCS([mol1, mol2], ringMatchesRingOnly=True, matchValences=True)
    mcs = AllChem.MolFromSmarts(mcs_result.smartsString)
    AllChem.Compute2DCoords(mcs)
    AlignDepict(mol1, mcs)
    AlignDepict(mol2, mcs)
    alt = '(' + ', '.join(r['alternate_names']) + ')' if r['alternate_names'] else ''
    alt_rt = '(' + ', '.join(r['alternate_names_rt']) + ')' if r['alternate_names_rt'] else ''
    mol1_match = mol1.GetSubstructMatch(mcs)
    mol2_match = mol2.GetSubstructMatch(mcs)
    std_args = {'highlightColor': [0, 0.6, 0.6], 'kekulize': False, 'size': (500, 300)}
    display(Box([
        VBox([Image(value=Draw.MolToImage(mol1, highlightAtoms=mol1_match, **std_args)._repr_png_()),
              Label('{name} {alt} : {broad_id}'.format(alt=alt, **r))]),
        VBox([Image(value=Draw.MolToImage(mol2, highlightAtoms=mol2_match, **std_args)._repr_png_()),
              Label('{name_rt} {alt} : {lincs_id}'.format(alt=alt_rt, **r))]),
    ]))
    print(r['smiles'])
    print(r['smiles_rt'])
Ejemplo n.º 3
0
def _exploder(mol,
              depth,
              sidechains,
              core,
              chainIndices,
              autoNames=True,
              templateName='',
              resetCounter=True,
              do3D=False,
              useTethers=False):
    global nDumped
    if resetCounter:
        nDumped = 0
    ourChains = sidechains[depth]
    patt = '[%d*]' % (depth + 1)
    patt = Chem.MolFromSmiles(patt)
    for i, (chainIdx, chain) in enumerate(ourChains):
        tchain = chainIndices[:]
        tchain.append((i, chainIdx))
        rs = Chem.ReplaceSubstructs(mol, patt, chain, replaceAll=True)
        if rs:
            r = rs[0]
            if depth < len(sidechains) - 1:
                for entry in _exploder(r,
                                       depth + 1,
                                       sidechains,
                                       core,
                                       tchain,
                                       autoNames=autoNames,
                                       templateName=templateName,
                                       resetCounter=0,
                                       do3D=do3D,
                                       useTethers=useTethers):
                    yield entry
            else:
                try:
                    Chem.SanitizeMol(r)
                except ValueError:
                    import traceback
                    traceback.print_exc()
                    continue
                if not do3D:
                    if r.HasSubstructMatch(core):
                        try:
                            AlignDepict(r, core)
                        except:
                            import traceback
                            traceback.print_exc()
                            print(Chem.MolToSmiles(r), file=sys.stderr)
                    else:
                        print('>>> no match', file=sys.stderr)
                        AllChem.Compute2DCoords(r)
                else:
                    r = Chem.AddHs(r)
                    AllChem.ConstrainedEmbed(r, core, useTethers)
                Chem.Kekulize(r)
                if autoNames:
                    tName = "TemplateEnum: Mol_%d" % (nDumped + 1)
                else:
                    tName = templateName
                    for bbI, bb in enumerate(tchain):
                        bbMol = sidechains[bbI][bb[0]][1]
                        if bbMol.HasProp('_Name'):
                            bbNm = bbMol.GetProp('_Name')
                        else:
                            bbNm = str(bb[1])
                        tName += '_' + bbNm

                r.SetProp("_Name", tName)
                r.SetProp('seq_num', str(nDumped + 1))
                r.SetProp('reagent_indices',
                          '_'.join([str(x[1]) for x in tchain]))
                for bbI, bb in enumerate(tchain):
                    bbMol = sidechains[bbI][bb[0]][1]
                    if bbMol.HasProp('_Name'):
                        bbNm = bbMol.GetProp('_Name')
                    else:
                        bbNm = str(bb[1])
                    r.SetProp('building_block_%d' % (bbI + 1), bbNm)
                    for propN in bbMol.GetPropNames():
                        r.SetProp('building_block_%d_%s' % (bbI + 1, propN),
                                  bbMol.GetProp(propN))
                nDumped += 1
                if not nDumped % 100:
                    logger.info('Done %d molecules' % nDumped)
                yield r