Exemple #1
0
 def append(self, newms):
     """ add new molecules for training """
     a3 = []
     for mi in newms:
         om = crc.RDMol(mi, forcefield=self.ff)
         om.optg()
         om.optg2()
         a3.append(om.ats)
     self.ms = self.a1 + self.a2 + a3 + self.ts
Exemple #2
0
def diagnose1(fa, fq):
    """ do diagnose for one amon and the query """
    oa = crk.RDMol(fa)
    oa.get_angles(wH=F, key='ia')
    oa.get_dihedral_angles(wH=F, key='ia')
    oq = crk.RDMol(fq)
    oq.get_angles(wH=F, key='ia')
    oq.get_dihedral_angles(wH=F, key='ia')
    smi = oa.prop['smiles_indigo']
    patt = crk.smi2patt(smi)
    print('patt=', patt)
    mp = Chem.MolFromSmarts(patt)
    iass_a = oa.m.GetSubstructMatches(mp)
    iass_q = oq.m.GetSubstructMatches(mp)

    types_a, vals_a = get_angles34(oa.dangs, iass_a)
    #dic_a = dict(zip(types_a[0],vals_a[0]))
    types_q, _vals_q = get_angles34(oq.dangs, iass_q)
    n = len(types_q)
    vals_q = []
    for i in range(n):
        types = types_q[i]
        vals = _vals_q[i]
        #print ' -- types = ', types
        dic_q = dict(list(zip(types, vals)))
        _vals = []
        for key in types_a[0]:
            _vals.append(dic_q[key])
        vals_q.append(_vals)
    print(' amon:')
    print(types_a[0])
    print(list(vals_a[0]))
    print(' query:')
    for i in range(n):
        print(vals_q[i])
    print(' Difference:')
    for i in range(n):
        print(list(vals_q[i] - vals_a[0]))
Exemple #3
0
 def __init__(self, fs, rsmi, props=['HF'], istart=0):
     self.nc0 = len(fs)
     fsc = []
     cso = []  # mol objects
     ms = []  # ase mols
     ys = []
     #assert '_c' in fs[0]
     #self.filename = '_'.join( fs[0].split('/')[-1].split('_')[:-1] )
     self.fs_diss = []
     self.fs_redundant = []
     cids = []
     for i, f in enumerate(fs):
         fmt = f[-3:]
         if fmt in ['log', 'out']:  #G09 output file
             dic = GR0(f, istart=istart)[-1]
             zs = np.array(dic['Atomic_numbers'], np.int)
             coords = np.array(dic['Positions'])
             m = atoms(zs, coords)
             try:
                 co = cmm.Mol(zs, coords, ican=True)
                 can2 = get_alternative(co.can)
                 if rsmi not in [co.can, can2]:
                     print("#ERROR: %s has a SMILES %s, differs from %s" %
                           (f, co.can, rsmi))
                     self.fs_diss.append(f)
                     continue
                 else:
                     _ys = {}
                     for key in props:
                         _ys[key] = get_val(dic, key)
                     ys.append(_ys)
                     ms.append(m)
                     cso.append(co)
                     fsc.append(f)
             except:
                 print("#ERROR: this is a radical!")
                 self.fs_diss.append(f)
                 continue
         elif fmt in ['mol', 'sdf']:
             oo = crk.RDMol(f)
             m = atoms(oo.zs, oo.coords)
             ms.append(m)
             cso.append(oo.prop['smiles_indigo'])
             fsc.append(f)
             ys.append([oo.prop[k] for k in props])
     self.cso = cso
     self.ms = ms
     self.fsc = fsc
     self.nc = len(cso)
     self.ys = ys
Exemple #4
0
 def query(self, fq, idQ=None, k=7):
     if self.cmaps[0]:
         #assert len(fsq) > 0, '#ERROR: `fsq not specified!'
         assert idQ is not None
         self.idQ = idQ  #fsq.index(fq)
     self.fq = fq
     #zs, coords, ydic = read_xyz_simple(f,opt='z')
     objq = crk.RDMol(fq)
     zs, coords, ydic = objq.zs, objq.coords, objq.prop
     smi = ydic['smiles_%s' % self.ctk]
     _ys = []
     for key in self.props:
         _yi = ydic[key] if key in ydic.keys() else np.nan
         _ys.append(_yi)
     ys_q = np.array([_ys])  #[ [ydic[key] for key in self.props ] ])
     nheav = (np.array(zs) > 1).sum()
     #ao = cma.amon(smi, k) # amon object. Note that can's are of indigo format
     #assert ao.iok
     #amons_q, ns_q, ats_q = ao.get_amons(iao=T) # idxs of atoms (in query mol) as output as well
     reduce_namons = T
     ao = coa.ParentMols([smi], reduce_namons, wg=F, imap=T, k=7)
     #amons_q = []
     #for cci in ao.cans:
     #    mobj = indigo.Indigo()
     #    m2 = mobj.loadMolecule(cci)
     #    amons_q.append( m2.canonicalSmiles() )
     #self.ats_q = ao.atsa
     self.amons_q = ao.cans  #amons_q ## now stick to can of oechem format
     #self.ns_q = ao.nsa
     self.ys_q = ys_q
     self.zs_q = np.array(zs, np.int)
     self.nsheav_q = [nheav]
     self.coords_q = np.array(coords)
     # tor
     #self.x_q = get_torsions( atoms(zs,coords) ) #; sys.exit()
     objq.get_atypes()
     objq.get_angles(wH=F, key='ia')
     objq.get_dihedral_angles(wH=F, key='ia')
     self.objq = objq
Exemple #5
0
 def __init__(self, obj, addh=True):
     """
     process a list of SMILES objects  
     """
     smis = []
     typ = type(obj)
     if typ is str:
         if os.path.exists(obj):
             smis += [si.strip() for si in file(obj).readlines()]
         else:
             smis += [
                 obj,
             ]
     elif typ is list:
         for obj_i in obj:
             if os.path.exists(obj_i):
                 smis += [si.strip() for si in file(obj).readlines()]
             else:
                 smis += [
                     obj_i,
                 ]
     self.nm = len(smis)
     self.objs = [crk.RDMol(si) for si in smis]
Exemple #6
0
    def __init__(self, objs, wd='targets/', reduce_namons=T, wg=F, \
                 i3d=T, a1=T, level=2, exta=0, k=7, k2=7, \
                 ff='mmff94', owt=F):
        """
        vars
        ==============
        level: extended amons level, 1 or 2
        exta: maximal N_I of extended amons
        owt: overwrite target (when writing sdf file for target)
        """
        if isinstance(objs, str): objs = [objs]
        if not os.path.exists(wd): os.mkdir(wd)

        self.ff = ff

        ts = []
        fts = []
        # first get 3d geom of targests
        for obj in objs:
            om = crc.RDMol(obj, forcefield=ff)
            om.optg()
            om.optg2()
            ats = om.ats
            can = coo.oem2can(coo.smi2oem(obj)[1])
            can_hex = str2hex(coo.oem2can(coo.smi2oem(obj)[1]))
            f1 = wd + '%s.xyz' % can  #_hex
            f2 = wd + '%s.sdf' % can  #_hex
            if owt or (not (os.path.exists(f2) and os.path.exists(f1))):
                om.write_sdf(f2)
                om.write_xyz(f1)
            else:
                ats = cc.obj2mol(f1, ['E'])
            fts.append(f2)
            ts.append(ats)
        self.ts = ts

        a1 = []  # generic amons
        if a1:
            imap = F if len(fts) == 1 else T
            oa = coa.ParentMols(fts, reduce_namons, label=None, \
                          imap=imap, fixGeom=F, i3d=i3d, wg=wg, \
                          k=k,k2=k2, iprt=T, forcefield=ff, \
                          thresh=0.01, debug=F)
            for mi in oa.ms:
                om = crc.RDMol(mi, forcefield=ff)
                #om.optg()
                om.iFFOpt = T
                om.optg2()
                a.append(om.ats)
        self.a1 = a1

        a2 = []
        assert exta >= 0, '#ERROR: `exta: N_I of extended amons shoud be >= 0'
        if exta:
            # add extended amons
            oa2 = coae.transform(objs[0])
            oa2.get_newolds()
            newms, newms2 = oa2.T(level=level)
            oa2.get_amons_extended(k=exta)
            for mi in oa2.amons_extended:
                om = crc.RDMol(mi, forcefield=ff)
                om.optg()
                om.optg2()
                a2.append(om.ats)
        self.a2 = a2
        self.ms = a1 + a2 + ts
Exemple #7
0
                "OC12C3C4C1N4C32"] # the last one is highly strained, may have problem in acquring g0
    elif n == 1:
        f = args[0]
        if f[-3:] in ['smi','can']:
            objs = [ si.strip() for si in file(f).readlines() ]
        else:  # either an xyz file or a SMILES string
            objs = args
    else:
        objs = args

    isf = False
    nobj = len(objs)
    for i,obj in enumerate(objs):
        if not os.path.isfile(obj):
            f = tpf.NamedTemporaryFile(dir='/tmp').name + '.xyz'
            m0 = cir.RDMol(obj, doff=True)
            m0.write_xyz(f)
        else:
            isf = True
            f = obj
        o = cc.molecule(f, isimple=T)
        can = 'None'
        iok = T
        if nobj > 1:
            if trial:
                try:
                    m = Mol(o.zs, o.coords, ican=True)
                    can = m.can
                except:
                    iok = F #print(' conversion failed!')#pass
            else:
Exemple #8
0
    def __init__(self, fd, fd2=None, fcanr=None, h5f=None, \
                 imb=False, props=['E'], ctk='oechem'):
        """
        fcanr: file containing cononical SMILES for reference (from which conformers
                were generated)
        """
        self.ctk = ctk  # aqml.cheminfomatic toolkit to be used, rdkit, indigo, openbabael or oechem

        if fd[-1] == '/': fd = fd[:-1]
        self.fda = fd

        fs = io2.cmdout('ls %s/frag*.sdf' % fd)
        self.fs = fs
        nf = len(fs)
        ns = [len(f) for f in fs]
        assert len(np.unique(ns)) == 1, '#ERROR: filename length may differ'
        assert nf > 0, '#ERROR: no *.sdf found'
        objs = []
        s = []
        s1 = []  # unique smiles strings
        _ncs = [0] * nf
        imc = 0
        for f in fs:
            obj = crk.RDMol(f)
            objs.append(obj)
            smi = obj.prop['smiles_%s' % ctk]
            s.append(smi)
            if smi not in s1:
                _ncs[imc] = 1
                imc += 1
                s1.append(smi)
            else:
                im = s1.index(smi)
                _ncs[im] += 1

        if fcanr is not None:
            _amons = [si.strip() for si in open(fcanr).readlines()]
            if _namon == len(s1):
                print(
                    "#ERROR: #amons from sdf's inconsistent with that in %s" %
                    fcanr)
                print(
                    "        This means that some amons (graph) may have been")
                print("        hiscarded due to dissociation")
                raise
        else:
            _amons = s1
        _namon = len(_amons)
        self.amons = _amons

        nm = len(s1)

        ncs = _ncs[:nm]
        cidxs = np.arange(nf).astype(int)
        c2amap = []
        ics2 = np.array(np.cumsum(ncs), dtype=int)
        ics1 = np.array(np.concatenate(([0], ics2[:-1])), dtype=int)
        for im in range(nm):
            c2amap += [
                im,
            ] * ncs[im]
        a2cmap = [list(cidxs[ics1[iamon]:ics2[iamon]]) for iamon in range(nm)]

        cmaps = [F]
        # if provided h5 file, read conformer IDs from it
        if h5f is not None:
            dt = dd.io.load(h5f)
            _maps = dt['maps']
            if np.max(_maps) != nf - 1:
                print(' * Error: #sdf files .ne. max(maps)+1!!')
                raise
            cmaps = [T, _maps]

        # add new conformers from another folder
        ic2 = nf
        if fd2 is None:
            c2amap = np.array(c2amap, dtype=int)
        else:
            fs2 = io2.cmdout('ls %s/frag*sdf' % fd2)
            fs += fs2
            nf2 = len(fs2)
            s2 = []
            ic2s = []
            im2s = []
            for f2 in fs2:
                obj2 = crk.RDMol(f2)
                smi = obj2.prop['smiles_%s' % self.ctk]
                s2.append(smi)
                if smi in _amons:
                    im2 = _amons.index(smi)
                    c2amap.append(im2)
                    a2cmap[im2] += [ic2]
                    ic2 += 1
                    objs.append(obj2)
            assert len(s2) == nf2
            c2amap = np.array(c2amap, dtype=int)

            if cmaps[0]:
                # now update maps file
                _maps = cmaps[1]
                nt, ncmax = _maps.shape
                icsmax = np.arange(ncmax).astype(int)
                _maps2 = []
                ncs_added = []
                for j in range(nt):
                    fil = (_maps[j] > -1)
                    ims_j = np.unique(c2amap[_maps[j][fil]])
                    ics_j = []
                    for k in ims_j:
                        #print('k,len(a2cmap)=',k,len(a2cmap))
                        t2 = np.array(a2cmap[k], dtype=int)
                        ics_j += list(t2[t2 >= nf])
                    ncs_added.append(len(ics_j))
                    _maps2.append(list(_maps[j][fil]) + ics_j)
                nadd = max(ncs_added)
                print(
                    ' ## for some query molecule, %d conformers at most are added as new amons'
                    % nadd)
                maps2 = -1 * np.ones((nt, ncmax + nadd), dtype=int)
                for j in range(nt):
                    ncj = len(_maps2[j])
                    maps2[j, :ncj] = _maps2[j]
                cmaps = [T, maps2.astype(int)]
                c2amap = np.array(c2amap, dtype=int)
        self.c2amap = c2amap  # (amon/conformer) to (molecule graph) map
        self.a2cmap = a2cmap
        self.cmaps = cmaps

        # if `fcanr is not None, check dissociated molecules
        #if fcanr is not None:
        #    _remap = np.arange(nf).astype(int)
        #    for i,f in enumerate(fs):
        #        im = int(f.split('frag_')[1][:-4].split('_')[0])-1 # molecule index (graph! not conformer idx)
        #        smi = s[i]
        #        if smi != _amons[im]: # graph may have changed after optg
        #            _remap[i] = -1

        nct = ic2

        self.nmt = nm
        self.nct = nct
        self.fs = fs

        objsc = []
        ys = []
        zs = []
        nsheav = []
        nas = []
        coords = []
        for i in range(nct):
            objc = objs[i]
            zs += list(objc.zs)
            nas.append(objc.na)
            coords += list(objc.coords)
            nsheav.append(objc.nheav)
            if imb:  # calc many-body terms
                objc.get_angles(wH=F, key='ia')
                objc.get_dihedral_angles(wH=F, key='ia')
            objsc.append(objc)
            #print('f,e_qmc=',fs[i],objc.prop['QMC_B3LYP'])
            ys.append([objc.prop[key] for key in props])
        self.nas = np.array(nas, np.int)
        self.zs = np.array(zs, np.int)
        self.coords = np.array(coords)
        self.nsheav = np.array(nsheav, np.int)
        self.objsc = objsc

        self.ics1 = ics1
        self.ics2 = ics2
        self.props = props
        self.ys = np.array(ys)