コード例 #1
0
def readfromtxt(mol, txt):
    # print('!!!!', filename)
    globs = globalvars()
    en_dict = globs.endict()
    mol.graph = []
    for line in txt:
        line_split = line.split()
        if len(line_split) == 4 and line_split[0]:
            # this looks for unique atom IDs in files
            lm = re.search(r'\d+$', line_split[0])
            # if the string ends in digits m will be a Match object, or None otherwise.
            if lm is not None:
                symb = re.sub('\d+', '', line_split[0])
                # number = lm.group()
                # # print('sym and number ' +str(symb) + ' ' + str(number))
                # globs = globalvars()
                atom = atom3D(symb, [
                    float(line_split[1]),
                    float(line_split[2]),
                    float(line_split[3])
                ],
                              name=line_split[0])
            elif line_split[0] in list(en_dict.keys()):
                atom = atom3D(line_split[0], [
                    float(line_split[1]),
                    float(line_split[2]),
                    float(line_split[3])
                ])
            else:
                print('cannot find atom type')
                sys.exit()
            mol.addAtom(atom)
    return mol
コード例 #2
0
ファイル: tsgen.py プロジェクト: edgarin1st/molSimplify
def substplaceff_mode3(core, substr, substreact, compreact, cpoint, args, connected, frozenats):
    enc = 0
    # align substrate according to connection atom and shadow atom
    substr.alignmol(substr.getAtom(substreact), atom3D('H', cpoint))
    # perform rotations
    Bcoords = core.getAtomCoords(compreact)
    adjsidx = substr.getBondedAtoms(substreact)[0]
    if substr.natoms > 1:
        # align ligand center of symmetry
        substr = align_lig_centersym(Bcoords, substr, substreact, core, False)
    if substr.natoms > 2:
        # check for linear molecule and align
        substr = check_rotate_linear_lig(Bcoords, substr, substreact)
        # check for symmetric molecule
        substr = check_rotate_symm_lig(Bcoords, substr, substreact, core)
        # rotate around M-L axis to minimize steric repulsion
        substr = rotate_MLaxis_minimize_steric(
            Bcoords, substr, substreact, core)
    # distort substrate molecule
    adjsidx = substr.getBondedAtoms(substreact)[0]
    XYBL = XYcoeff*(substr.getAtom(substreact).rad +
                    substr.getAtom(adjsidx).rad)
    substr.BCM(adjsidx, substreact, XYBL)
    # combine molecules
    ts3D = mol3D()
    ts3D.copymol3D(core)
    ts3D = ts3D.combine(substr)
    ts3D.charge += substr.charge
    if 'a' in args.ffoption or args.substplaceff:
        ts3D, enc = ffopt(args.ff, ts3D, connected, 1,
                          frozenats, False, [], 'Adaptive')
    return ts3D, enc
コード例 #3
0
def GetConf(mol, args, catoms=[]):
    # Create a mol3D copy with a dummy metal metal
    Conf3D = mol3D()
    Conf3D.copymol3D(mol)
    Conf3D.addAtom(atom3D('Fe', [0, 0, 0]))  #Add dummy metal to the mol3D
    dummy_metal = openbabel.OBAtom()  #And add the dummy metal to the OBmol
    dummy_metal.SetAtomicNum(26)
    Conf3D.OBMol.AddAtom(dummy_metal)
    for i in catoms:
        Conf3D.OBMol.AddBond(i + 1, Conf3D.OBMol.NumAtoms(), 1)
    natoms = Conf3D.natoms
    Conf3D.createMolecularGraph()

    shape = findshape(args, mol)
    LB, UB = GetBoundsMatrices(Conf3D, natoms, catoms, shape)
    status = False
    while not status:
        D = Metrize(LB, UB, natoms)
        D0, status = GetCMDists(D, natoms)
    G = GetMetricMatrix(D, D0, natoms)
    L, V = Get3Eigs(G, natoms)
    X = np.dot(V, L)  # get projection
    x = np.reshape(X, 3 * natoms)
    res1 = optimize.fmin_cg(DistErr,
                            x,
                            fprime=DistErrGrad,
                            gtol=0.1,
                            args=(LB, UB, natoms),
                            disp=0)
    X = np.reshape(res1, (natoms, 3))
    Conf3D = SaveConf(X, Conf3D, True, catoms)

    return Conf3D
コード例 #4
0
ファイル: mol3D.py プロジェクト: hitliaomq/molSimplify
 def copymol3D(self, mol0):
     # copy atoms
     for i, atom0 in enumerate(mol0.atoms):
         self.addAtom(atom3D(atom0.sym, atom0.coords()))
         if atom0.frozen:
             self.getAtom(i).frozen = True
     # copy other attributes
     self.cat = mol0.cat
     self.charge = mol0.charge
     self.denticity = mol0.denticity
     self.ident = mol0.ident
     self.ffopt = mol0.ffopt
     self.OBMol = mol0.OBMol
コード例 #5
0
ファイル: mol3D.py プロジェクト: hitliaomq/molSimplify
 def convert2mol3D(self):
     # initialize again
     self.initialize()
     # get elements dictionary
     elem = globalvars().elementsbynum()
     # loop over atoms
     for atom in openbabel.OBMolAtomIter(self.OBMol):
         # get coordinates
         pos = [atom.GetX(), atom.GetY(), atom.GetZ()]
         # get atomic symbol
         sym = elem[atom.GetAtomicNum() - 1]
         # add atom to molecule
         self.addAtom(atom3D(sym, [pos[0], pos[1], pos[2]]))
コード例 #6
0
ファイル: distgeom.py プロジェクト: adityanandy/molSimplify
def GetConf(mol, args, catoms=[]):
    """Uses distance geometry to get a random conformer.
        
    Parameters
    ----------
        mol : mol3D
            mol3D class instance for molecule of interest.
        args : Namespace
            Namespace argument from inparse.
        catoms : list, optional
            List of connection atoms used to generate additional constraints if specified (see GetBoundsMatrices()). Default is empty.
        
    Returns
    -------
        Conf3D : mol3D
            mol3D class instance of new conformer.

    """
    # Create a mol3D copy with a dummy metal metal
    Conf3D = mol3D()
    Conf3D.copymol3D(mol)
    Conf3D.addAtom(atom3D('Fe', [0, 0, 0]))  #Add dummy metal to the mol3D
    dummy_metal = openbabel.OBAtom()  #And add the dummy metal to the OBmol
    dummy_metal.SetAtomicNum(26)
    Conf3D.OBMol.AddAtom(dummy_metal)
    for i in catoms:
        Conf3D.OBMol.AddBond(i + 1, Conf3D.OBMol.NumAtoms(), 1)
    natoms = Conf3D.natoms
    Conf3D.createMolecularGraph()

    shape = findshape(args, mol)
    LB, UB = GetBoundsMatrices(Conf3D, natoms, catoms, shape)
    status = False
    while not status:
        D = Metrize(LB, UB, natoms)
        D0, status = GetCMDists(D, natoms)
    G = GetMetricMatrix(D, D0, natoms)
    L, V = Get3Eigs(G, natoms)
    X = np.dot(V, L)  # get projection
    x = np.reshape(X, 3 * natoms)
    res1 = optimize.fmin_cg(DistErr,
                            x,
                            fprime=DistErrGrad,
                            gtol=0.1,
                            args=(LB, UB, natoms),
                            disp=0)
    X = np.reshape(res1, (natoms, 3))
    Conf3D = SaveConf(X, Conf3D, True, catoms)

    return Conf3D
コード例 #7
0
ファイル: tsgen.py プロジェクト: edgarin1st/molSimplify
def getconnections(core, catom, Midx, BL, ABXang):
    Ocoords = core.getAtom(catom).coords()
    Mcoords = core.getAtom(Midx).coords()
    backbcoords = alignPtoaxis(Ocoords, Ocoords, vecdiff(Ocoords, Mcoords), BL)
    am = atom3D('C', backbcoords)
    connPts = []
    for iphi in range(1, 359, 10):
        for itheta in range(1, 179, 1):
            P = PointTranslateSph(Ocoords, backbcoords, [BL, iphi, itheta])
            am.setcoords(P)
            ang = 180-vecangle(vecdiff(Ocoords, Mcoords), vecdiff(P, Ocoords))
            if abs(ang - ABXang) < 1:
                connPts.append(P)
    return connPts
コード例 #8
0
def pad_mol(mol, target_atoms):
    ## adds placeholder atoms
    ## with zero nuclear charge
    ## located at the origin
    ## in order to get consistent size
    ## coulomb matrix
    this_natoms = mol.natoms
    blank_atom = atom3D(Sym='X')  # placeholder type
    blank_atom.frozen = False
    safe_stop = False
    counter = 0
    while this_natoms < target_atoms and not safe_stop:
        mol.addAtom(blank_atom)
        this_natoms = mol.natoms
        counter += 1
        if counter > target_atoms:
            safe_stop = True
            print('error padding mol')
    return mol
コード例 #9
0
ファイル: mol3D.py プロジェクト: hitliaomq/molSimplify
 def readfromxyz(self, filename):
     globs = globalvars()
     en_dict = globs.endict()
     self.graph = []
     fname = filename.split('.xyz')[0]
     f = open(fname + '.xyz', 'r')
     s = f.read().splitlines()
     f.close()
     # for line in s[2:]:
     #     l = filter(None,line.split(None))
     #     if len(l) > 3:
     #         atom = atom3D(l[0],[float(l[1]),float(l[2]),float(l[3])])
     #         self.addAtom(atom)
     for line in s:
         line_split = line.split()
         if len(line_split) == 4 and line_split[0] in en_dict:
             l = filter(None, line.split(None))
             if len(l) > 3:
                 atom = atom3D(
                     l[0],
                     [float(l[1]), float(l[2]),
                      float(l[3])])
                 self.addAtom(atom)
コード例 #10
0
ファイル: tf_nn_prep.py プロジェクト: adityanandy/molSimplify
def tf_ANN_preproc(args, ligs, occs, dents, batslist, tcats, licores):
    # prepares and runs ANN calculation

    current_time = time.time()
    start_time = current_time
    last_time = current_time

    ######################
    ANN_reason = {}
    ANN_attributes = {}
    ######################

    r = 0
    emsg = list()
    valid = True
    catalysis = False
    metal = args.core
    this_metal = metal.lower()
    if len(this_metal) > 2:
        this_metal = this_metal[0:2]
    newligs = []
    newcats = []
    newdents = []
    newoccs = []
    newdecs = [False] * 6
    newdec_inds = [[]] * 6
    ANN_trust = False
    count = -1
    for i, lig in enumerate(ligs):
        this_occ = occs[i]
        if args.debug:
            print(('working on lig: ' + str(lig)))
            print(('occ is  ' + str(this_occ)))
        for j in range(0, int(this_occ)):
            count += 1
            newligs.append(lig)
            newdents.append(dents[i])
            newcats.append(tcats[i])
            newoccs.append(1)
            if args.decoration:
                newdecs[count] = (args.decoration[i])
                newdec_inds[count] = (args.decoration_index[i])

    ligs = newligs
    dents = newdents
    tcats = newcats
    occs = newoccs
    if args.debug:
        print('tf_nn has finisihed prepping ligands')

    if not args.geometry == "oct":
        emsg.append(
            "[ANN] Geometry is not supported at this time, MUST give -geometry = oct"
        )
        valid = False
        ANN_reason = 'geometry not oct'
    if not args.oxstate:
        emsg.append("\n oxidation state must be given")
        valid = False
        ANN_reason = 'oxstate not given'
    if valid:
        oxidation_state = args.oxstate
        valid, oxidation_state = check_metal(this_metal, oxidation_state)
        if int(oxidation_state) in [3, 4, 5]:
            catalytic_moieties = ['oxo', 'x', 'hydroxyl', '[O--]', '[OH-]']
            if args.debug:
                print(('the ligands are', ligs))
                print((set(ligs).intersection(set(catalytic_moieties))))
            if len(set(ligs).intersection(set(catalytic_moieties))) > 0:
                catalysis = True
        # generate key in descriptor space
        ox = int(oxidation_state)
        spin = args.spin
        if args.debug:
            print(('metal is ' + str(this_metal)))
            print(('metal validity', valid))
    if not valid and not catalysis:
        emsg.append("\n Oxidation state not available for this metal")
        ANN_reason = 'ox state not available for metal'
    if valid:
        high_spin, spin_ops = spin_classify(this_metal, spin, ox)
    if not valid and not catalysis:
        emsg.append("\n this spin state not available for this metal")
        ANN_reason = 'spin state not available for metal'
    if emsg:
        print((str(" ".join(["ANN messages:"] + [str(i) for i in emsg]))))

    current_time = time.time()
    metal_check_time = current_time - last_time
    last_time = current_time
    if args.debug:
        print(('checking metal/ox took  ' +
               "{0:.2f}".format(metal_check_time) + ' seconds'))

    if valid or catalysis:
        (valid, axial_ligs, equitorial_ligs, ax_dent, eq_dent, ax_tcat,
         eq_tcat, axial_ind_list, equitorial_ind_list, ax_occs, eq_occs,
         pentadentate) = tf_check_ligands(ligs, batslist, dents, tcats, occs,
                                          args.debug)

        if args.debug:
            print(("ligand validity is  " + str(valid)))
            print(('Occs', occs))
            print(('Ligands', ligs))
            print(('Dents', dents))
            print(('Bats (backbone atoms)', batslist))
            print(('lig validity', valid))
            print(('ax ligs', axial_ligs))
            print(('eq ligs', equitorial_ligs))
            print(('spin is', spin))

        if catalysis:
            valid = False
    if (not valid) and (not catalysis):
        ANN_reason = 'found incorrect ligand symmetry'
    elif not valid and catalysis:
        if args.debug:
            print('tf_nn detects catalytic')
        ANN_reason = 'catalytic structure presented'

    # placeholder for metal
    metal_mol = mol3D()
    metal_mol.addAtom(atom3D(metal))

    net_lig_charge = 0
    if valid or catalysis:
        if args.debug:
            print('loading axial ligands')
        ax_ligands_list = list()
        eq_ligands_list = list()
        for ii, axl in enumerate(axial_ligs):
            ax_lig3D, r_emsg = lig_load(axl, licores)  # load ligand
            net_lig_charge += ax_lig3D.charge
            if r_emsg:
                emsg += r_emsg
            if ax_tcat:
                ax_lig3D.cat = ax_tcat
                if args.debug:
                    print(('custom ax connect atom given (0-ind) ' +
                           str(ax_tcat)))
            if pentadentate and len(ax_lig3D.cat) > 1:
                ax_lig3D.cat = [ax_lig3D.cat[-1]]
            this_lig = ligand(mol3D(), [], ax_dent)
            this_lig.mol = ax_lig3D

            # check decoration index
            if newdecs:
                if newdecs[axial_ind_list[ii]]:
                    print(('decorating ' + str(axl) + ' with ' +
                           str(newdecs[axial_ind_list[ii]]) + ' at sites ' +
                           str(newdec_inds[axial_ind_list[ii]])))
                    ax_lig3D = decorate_ligand(args, axl,
                                               newdecs[axial_ind_list[ii]],
                                               newdec_inds[axial_ind_list[ii]])
            ax_lig3D.convert2mol3D()  # mol3D representation of ligand
            for jj in range(0, ax_occs[ii]):
                ax_ligands_list.append(this_lig)
        print(('Obtained the net ligand charge, which is... ', net_lig_charge))
        if args.debug:
            print('ax_ligands_list:')
            print(ax_ligands_list)
            print([h.mol.cat for h in ax_ligands_list])

        if args.debug:
            print(('loading equitorial ligands ' + str(equitorial_ligs)))
        for ii, eql in enumerate(equitorial_ligs):
            eq_lig3D, r_emsg = lig_load(eql, licores)  # load ligand
            net_lig_charge += eq_lig3D.charge
            if r_emsg:
                emsg += r_emsg
            if eq_tcat:
                eq_lig3D.cat = eq_tcat
                if args.debug:
                    print(('custom eq connect atom given (0-ind) ' +
                           str(eq_tcat)))
            if pentadentate and len(eq_lig3D.cat) > 1:
                eq_lig3D.cat = eq_lig3D.cat[0:4]

            if newdecs:
                if args.debug:
                    print(('newdecs' + str(newdecs)))
                    print(
                        ('equitorial_ind_list is ' + str(equitorial_ind_list)))
                c = 0
                if newdecs[equitorial_ind_list[ii]]:
                    if args.debug:
                        print(('decorating ' + str(eql) + ' with ' +
                               str(newdecs[equitorial_ind_list[ii]]) +
                               ' at sites ' +
                               str(newdec_inds[equitorial_ind_list[ii]])))
                    eq_lig3D = decorate_ligand(
                        args, eql, newdecs[equitorial_ind_list[ii]],
                        newdec_inds[equitorial_ind_list[ii]])
                    c += 1

            eq_lig3D.convert2mol3D()  # mol3D representation of ligand
            this_lig = ligand(mol3D(), [], eq_dent)
            this_lig.mol = eq_lig3D

            for jj in range(0, eq_occs[ii]):
                eq_ligands_list.append(this_lig)
        if args.debug:
            print('eq_ligands_list:')
            print(eq_ligands_list)

            current_time = time.time()
            ligand_check_time = current_time - last_time
            last_time = current_time
            print(('checking ligs took ' +
                   "{0:.2f}".format(ligand_check_time) + ' seconds'))
            print(
                ('writing copies of ligands as used  in ANN to currrent dir : '
                 + os.getcwd()))
            for kk, l in enumerate(ax_ligands_list):
                l.mol.writexyz('axlig-' + str(kk) + '.xyz')
            for kk, l in enumerate(eq_ligands_list):
                l.mol.writexyz('eqlig-' + str(kk) + '.xyz')
        # make description of complex
        custom_ligand_dict = {
            "eq_ligand_list": eq_ligands_list,
            "ax_ligand_list": ax_ligands_list,
            "eq_con_int_list": [h.mol.cat for h in eq_ligands_list],
            "ax_con_int_list": [h.mol.cat for h in ax_ligands_list]
        }

        ox_modifier = {metal: ox}

        this_complex = assemble_connectivity_from_parts(
            metal_mol, custom_ligand_dict)

        if args.debug:
            print('custom_ligand_dict is : ')
            print(custom_ligand_dict)

    if args.debug:
        print(('finished checking ligands, valid is ' + str(valid)))
        print('assembling RAC custom ligand configuration dictionary')

    if valid:
        # =====Classifiers:=====
        _descriptor_names = ["oxstate", "spinmult", "charge_lig"]
        _descriptors = [ox, spin, net_lig_charge]
        descriptor_names, descriptors = get_descriptor_vector(
            this_complex, custom_ligand_dict, ox_modifier)
        descriptor_names = _descriptor_names + descriptor_names
        descriptors = _descriptors + descriptors
        flag_oct, geo_lse = ANN_supervisor("geo_static_clf",
                                           descriptors,
                                           descriptor_names,
                                           debug=args.debug)
        # Test for scikit-learn models
        # flag_oct, geo_lse = sklearn_supervisor("geo_static_clf", descriptors, descriptor_names, debug=False)
        sc_pred, sc_lse = ANN_supervisor("sc_static_clf",
                                         descriptors,
                                         descriptor_names,
                                         debug=args.debug)
        ANN_attributes.update({
            "geo_label": 0 if flag_oct[0, 0] <= 0.5 else 1,
            "geo_prob": flag_oct[0, 0],
            "geo_LSE": geo_lse[0],
            "geo_label_trust": lse_trust(geo_lse),
            "sc_label": 0 if sc_pred[0, 0] <= 0.5 else 1,
            "sc_prob": sc_pred[0, 0],
            "sc_LSE": sc_lse[0],
            "sc_label_trust": lse_trust(sc_lse)
        })

        # build RACs without geo
        con_mat = this_complex.graph
        descriptor_names, descriptors = get_descriptor_vector(
            this_complex, custom_ligand_dict, ox_modifier)

        # get one-hot-encoding (OHE)
        descriptor_names, descriptors = create_OHE(descriptor_names,
                                                   descriptors, metal,
                                                   oxidation_state)

        # get alpha
        alpha = 0.2  # default for B3LYP
        if args.exchange:
            try:
                if float(args.exchange) > 1:
                    alpha = float(args.exchange) / 100  # if given as %
                elif float(args.exchange) <= 1:
                    alpha = float(args.exchange)
            except:
                print('cannot cast exchange argument as a float, using 20%')
        descriptor_names += ['alpha']
        descriptors += [alpha]
        descriptor_names += ['ox']
        descriptors += [ox]
        descriptor_names += ['spin']
        descriptors += [spin]
        if args.debug:
            current_time = time.time()
            rac_check_time = current_time - last_time
            last_time = current_time
            print(('getting RACs took ' + "{0:.2f}".format(rac_check_time) +
                   ' seconds'))

        # get spin splitting:
        split, latent_split = ANN_supervisor('split', descriptors,
                                             descriptor_names, args.debug)
        if args.debug:
            current_time = time.time()
            split_ANN_time = current_time - last_time
            last_time = current_time
            print(('split ANN took ' + "{0:.2f}".format(split_ANN_time) +
                   ' seconds'))

        # get bond lengths:
        if oxidation_state == '2':
            r_ls, latent_r_ls = ANN_supervisor('ls_ii', descriptors,
                                               descriptor_names, args.debug)
            r_hs, latent_r_hs = ANN_supervisor('hs_ii', descriptors,
                                               descriptor_names, args.debug)
        elif oxidation_state == '3':
            r_ls, latent_r_ls = ANN_supervisor('ls_iii', descriptors,
                                               descriptor_names, args.debug)
            r_hs, latent_r_hs = ANN_supervisor('hs_iii', descriptors,
                                               descriptor_names, args.debug)
        if not high_spin:
            r = r_ls[0]
        else:
            r = r_hs[0]

        if args.debug:
            current_time = time.time()
            GEO_ANN_time = current_time - last_time
            last_time = current_time
            print(('GEO ANN took ' + "{0:.2f}".format(GEO_ANN_time) +
                   ' seconds'))

        h**o, latent_homo = ANN_supervisor('h**o', descriptors,
                                           descriptor_names, args.debug)
        if args.debug:
            current_time = time.time()
            homo_ANN_time = current_time - last_time
            last_time = current_time
            print(('h**o ANN took ' + "{0:.2f}".format(homo_ANN_time) +
                   ' seconds'))

        gap, latent_gap = ANN_supervisor('gap', descriptors, descriptor_names,
                                         args.debug)
        if args.debug:
            current_time = time.time()
            gap_ANN_time = current_time - last_time
            last_time = current_time
            print(('gap ANN took ' + "{0:.2f}".format(gap_ANN_time) +
                   ' seconds'))

        # get minimum distance to train (for splitting)

        split_dist = find_true_min_eu_dist("split", descriptors,
                                           descriptor_names)
        if args.debug:
            current_time = time.time()
            min_dist_time = current_time - last_time
            last_time = current_time
            print(('min dist took ' + "{0:.2f}".format(min_dist_time) +
                   ' seconds'))

        homo_dist = find_true_min_eu_dist("h**o", descriptors,
                                          descriptor_names)
        homo_dist = find_ANN_latent_dist("h**o", latent_homo, args.debug)
        if args.debug:
            current_time = time.time()
            min_dist_time = current_time - last_time
            last_time = current_time
            print(('min H**O dist took ' + "{0:.2f}".format(min_dist_time) +
                   ' seconds'))

        gap_dist = find_true_min_eu_dist("gap", descriptors, descriptor_names)
        gap_dist = find_ANN_latent_dist("gap", latent_gap, args.debug)
        if args.debug:
            current_time = time.time()
            min_dist_time = current_time - last_time
            last_time = current_time
            print(('min GAP dist took ' + "{0:.2f}".format(min_dist_time) +
                   ' seconds'))

        # save attributes for return
        ANN_attributes.update({'split': split[0][0]})
        ANN_attributes.update({'split_dist': split_dist})
        ANN_attributes.update({'This spin': spin})
        if split[0][0] < 0 and (abs(split[0]) > 5):
            ANN_attributes.update({'ANN_ground_state': spin_ops[1]})
        elif split[0][0] > 0 and (abs(split[0]) > 5):
            ANN_attributes.update({'ANN_ground_state': spin_ops[0]})
        else:
            ANN_attributes.update(
                {'ANN_ground_state': 'dgen ' + str(spin_ops)})

        ANN_attributes.update({'h**o': h**o[0][0]})
        ANN_attributes.update({'gap': gap[0][0]})
        ANN_attributes.update({'homo_dist': homo_dist})
        ANN_attributes.update({'gap_dist': gap_dist})

        # now that we have bond predictions, we need to map these
        # back to a length of equal size as the original ligand request
        # in order for molSimplify to understand if
        ANN_bondl = len(ligs) * [False]
        added = 0
        for ii, eql in enumerate(equitorial_ind_list):
            for jj in range(0, eq_occs[ii]):
                ANN_bondl[added] = r[2]
                added += 1

        for ii, axl in enumerate(axial_ind_list):
            if args.debug:
                print((ii, axl, added, ax_occs))
            for jj in range(0, ax_occs[ii]):
                if args.debug:
                    print((jj, axl, added, r[ii]))
                ANN_bondl[added] = r[ii]
                added += 1

        ANN_attributes.update({'ANN_bondl': 4 * [r[2]] + [r[0], r[1]]})

        HOMO_ANN_trust = 'not set'
        HOMO_ANN_trust_message = ""
        # Not quite sure if this should be divided by 3 or not, since RAC-155 descriptors
        if float(homo_dist) < 3:
            HOMO_ANN_trust_message = 'ANN results should be trustworthy for this complex '
            HOMO_ANN_trust = 'high'
        elif float(homo_dist) < 5:
            HOMO_ANN_trust_message = 'ANN results are probably useful for this complex '
            HOMO_ANN_trust = 'medium'
        elif float(homo_dist) <= 10:
            HOMO_ANN_trust_message = 'ANN results are fairly far from training data, be cautious '
            HOMO_ANN_trust = 'low'
        elif float(homo_dist) > 10:
            HOMO_ANN_trust_message = 'ANN results are too far from training data, be cautious '
            HOMO_ANN_trust = 'very low'
        ANN_attributes.update({'homo_trust': HOMO_ANN_trust})
        ANN_attributes.update({'gap_trust': HOMO_ANN_trust})

        ANN_trust = 'not set'
        ANN_trust_message = ""
        if float(split_dist / 3) < 0.25:
            ANN_trust_message = 'ANN results should be trustworthy for this complex '
            ANN_trust = 'high'
        elif float(split_dist / 3) < 0.75:
            ANN_trust_message = 'ANN results are probably useful for this complex '
            ANN_trust = 'medium'
        elif float(split_dist / 3) < 1.0:
            ANN_trust_message = 'ANN results are fairly far from training data, be cautious '
            ANN_trust = 'low'
        elif float(split_dist / 3) > 1.0:
            ANN_trust_message = 'ANN results are too far from training data, be cautious '
            ANN_trust = 'very low'
        ANN_attributes.update({'split_trust': ANN_trust})

        # print text to std out
        print(
            "******************************************************************"
        )
        print(
            "************** ANN is engaged and advising on spin ***************"
        )
        print(
            "************** and metal-ligand bond distances    ****************"
        )
        print(
            "******************************************************************"
        )
        if high_spin:
            print(('You have selected a high-spin state, s = ' + str(spin)))
        else:
            print(('You have selected a low-spin state, s = ' + str(spin)))
        # report to stdout
        if split[0] < 0 and not high_spin:
            if abs(split[0]) > 5:
                print(
                    'warning, ANN predicts a high spin ground state for this complex'
                )
            else:
                print(
                    'warning, ANN predicts a near degenerate ground state for this complex'
                )
        elif split[0] >= 0 and high_spin:
            if abs(split[0]) > 5:
                print(
                    'warning, ANN predicts a low spin ground state for this complex'
                )
            else:
                print(
                    'warning, ANN predicts a near degenerate ground state for this complex'
                )
        print(('delta is', split[0], ' spin is ', high_spin))
        print(("ANN predicts a spin splitting (HS - LS) of " +
               "{0:.2f}".format(float(split[0])) + ' kcal/mol at ' +
               "{0:.0f}".format(100 * alpha) + '% HFX'))
        print(('ANN low spin bond length (ax1/ax2/eq) is predicted to be: ' +
               " /".join(["{0:.2f}".format(float(i))
                          for i in r_ls[0]]) + ' angstrom'))
        print(('ANN high spin bond length (ax1/ax2/eq) is predicted to be: ' +
               " /".join(["{0:.2f}".format(float(i))
                          for i in r_hs[0]]) + ' angstrom'))
        print(('distance to splitting energy training data is ' +
               "{0:.2f}".format(split_dist)))
        print(ANN_trust_message)
        print(("ANN predicts a H**O value of " +
               "{0:.2f}".format(float(h**o[0])) + ' eV at ' +
               "{0:.0f}".format(100 * alpha) + '% HFX'))
        print(("ANN predicts a LUMO-H**O energetic gap value of " +
               "{0:.2f}".format(float(gap[0])) + ' eV at ' +
               "{0:.0f}".format(100 * alpha) + '% HFX'))
        print(HOMO_ANN_trust_message)
        print(('distance to H**O training data is ' +
               "{0:.2f}".format(homo_dist)))
        print(
            ('distance to GAP training data is ' + "{0:.2f}".format(gap_dist)))
        print(
            "*******************************************************************"
        )
        print(
            "************** ANN complete, saved in record file *****************"
        )
        print(
            "*******************************************************************"
        )
        from keras import backend as K
        # This is done to get rid of the attribute error that is a bug in tensorflow.
        K.clear_session()
        current_time = time.time()
        total_ANN_time = current_time - start_time
        last_time = current_time
        print(('Total ML functions took ' + "{0:.2f}".format(total_ANN_time) +
               ' seconds'))

    if catalysis:
        print('-----In Catalysis Mode-----')
        # build RACs without geo
        con_mat = this_complex.graph
        descriptor_names, descriptors = get_descriptor_vector(
            this_complex, custom_ligand_dict, ox_modifier)
        # get alpha
        alpha = 20  # default for B3LYP
        if args.exchange:
            try:
                if float(args.exchange) < 1:
                    alpha = float(args.exchange) * 100  # if given as %
                elif float(args.exchange) >= 1:
                    alpha = float(args.exchange)
            except:
                print('cannot case exchange argument as a float, using 20%')
        descriptor_names += ['alpha', 'ox', 'spin', 'charge_lig']
        descriptors += [alpha, ox, spin, net_lig_charge]
        if args.debug:
            current_time = time.time()
            rac_check_time = current_time - last_time
            last_time = current_time
            print(('getting RACs took ' + "{0:.2f}".format(rac_check_time) +
                   ' seconds'))
        oxo, latent_oxo = ANN_supervisor('oxo', descriptors, descriptor_names,
                                         args.debug)
        if args.debug:
            current_time = time.time()
            split_ANN_time = current_time - last_time
            last_time = current_time
        oxo_dist, avg_10_NN_dist, avg_traintrain = find_ANN_10_NN_normalized_latent_dist(
            "oxo", latent_oxo, args.debug)
        if args.debug:
            current_time = time.time()
            min_dist_time = current_time - last_time
            last_time = current_time
            print(('min oxo dist took ' + "{0:.2f}".format(min_dist_time) +
                   ' seconds'))
        ANN_attributes.update({'oxo': oxo[0][0]})
        ANN_attributes.update({'oxo_dist': oxo_dist})

        hat, latent_hat = ANN_supervisor('hat', descriptors, descriptor_names,
                                         args.debug)
        if args.debug:
            current_time = time.time()
            split_ANN_time = current_time - last_time
            last_time = current_time
            print(('HAT ANN took ' + "{0:.2f}".format(split_ANN_time) +
                   ' seconds'))

        hat_dist, avg_10_NN_dist, avg_traintrain = find_ANN_10_NN_normalized_latent_dist(
            "hat", latent_hat, args.debug)
        if args.debug:
            current_time = time.time()
            min_dist_time = current_time - last_time
            last_time = current_time
            print(('min hat dist took ' + "{0:.2f}".format(min_dist_time) +
                   ' seconds'))
        ANN_attributes.update({'hat': hat[0][0]})
        ANN_attributes.update({'hat_dist': hat_dist})

        ########## for Oxo and H**O optimization ##########
        oxo20, latent_oxo20 = ANN_supervisor('oxo20', descriptors,
                                             descriptor_names, args.debug)
        if args.debug:
            current_time = time.time()
            oxo20_ANN_time = current_time - last_time
            last_time = current_time
            print(('oxo20 ANN took ' + "{0:.2f}".format(oxo20_ANN_time) +
                   ' seconds'))
        # oxo20_dist = find_ANN_latent_dist("oxo20", latent_oxo20, args.debug)
        oxo20_dist, avg_10_NN_dist, avg_traintrain = find_ANN_10_NN_normalized_latent_dist(
            "oxo20", latent_oxo20, args.debug)
        if args.debug:
            current_time = time.time()
            min_dist_time = current_time - last_time
            last_time = current_time
            print(('min oxo20 dist took ' + "{0:.2f}".format(min_dist_time) +
                   ' seconds'))
        ANN_attributes.update({'oxo20': oxo20[0][0]})
        ANN_attributes.update({'oxo20_dist': oxo20_dist})
        # _ = find_ANN_latent_dist("oxo20", latent_oxo20, args.debug)
        # _ = find_true_min_eu_dist("oxo20", descriptors, descriptor_names, latent_space_vector=latent_oxo20)

        homo_empty, latent_homo_empty = ANN_supervisor('homo_empty',
                                                       descriptors,
                                                       descriptor_names,
                                                       args.debug)
        if args.debug:
            current_time = time.time()
            homo_empty_ANN_time = current_time - last_time
            last_time = current_time
            print(('homo_empty ANN took ' +
                   "{0:.2f}".format(homo_empty_ANN_time) + ' seconds'))
        # homo_empty_dist = find_ANN_latent_dist("homo_empty", latent_homo_empty, args.debug)
        homo_empty_dist, avg_10_NN_dist, avg_traintrain = find_ANN_10_NN_normalized_latent_dist(
            "homo_empty", latent_homo_empty, args.debug)
        if args.debug:
            current_time = time.time()
            min_dist_time = current_time - last_time
            last_time = current_time
            print(('min homo_empty dist took ' +
                   "{0:.2f}".format(min_dist_time) + ' seconds'))
        ANN_attributes.update({'homo_empty': homo_empty[0][0]})
        ANN_attributes.update({'homo_empty_dist': homo_empty_dist})
        # _ = find_ANN_latent_dist("homo_empty", latent_homo_empty, args.debug)
        # _ = find_true_min_eu_dist("homo_empty", descriptors, descriptor_names, latent_space_vector=latent_homo_empty)

        Oxo20_ANN_trust = 'not set'
        Oxo20_ANN_trust_message = ""
        # Not quite sure if this should be divided by 3 or not, since RAC-155 descriptors
        if float(oxo20_dist) < 0.75:
            Oxo20_ANN_trust_message = 'Oxo20 ANN results should be trustworthy for this complex '
            Oxo20_ANN_trust = 'high'
        elif float(oxo20_dist) < 1:
            Oxo20_ANN_trust_message = 'Oxo20 ANN results are probably useful for this complex '
            Oxo20_ANN_trust = 'medium'
        elif float(oxo20_dist) <= 1.25:
            Oxo20_ANN_trust_message = 'Oxo20 ANN results are fairly far from training data, be cautious '
            Oxo20_ANN_trust = 'low'
        elif float(oxo20_dist) > 1.25:
            Oxo20_ANN_trust_message = 'Oxo20 ANN results are too far from training data, be cautious '
            Oxo20_ANN_trust = 'very low'
        ANN_attributes.update({'oxo20_trust': Oxo20_ANN_trust})

        homo_empty_ANN_trust = 'not set'
        homo_empty_ANN_trust_message = ""
        # Not quite sure if this should be divided by 3 or not, since RAC-155 descriptors
        if float(homo_empty_dist) < 0.75:
            homo_empty_ANN_trust_message = 'homo_empty ANN results should be trustworthy for this complex '
            homo_empty_ANN_trust = 'high'
        elif float(homo_empty_dist) < 1:
            homo_empty_ANN_trust_message = 'homo_empty ANN results are probably useful for this complex '
            homo_empty_ANN_trust = 'medium'
        elif float(homo_empty_dist) <= 1.25:
            homo_empty_ANN_trust_message = 'homo_empty ANN results are fairly far from training data, be cautious '
            homo_empty_ANN_trust = 'low'
        elif float(homo_empty_dist) > 1.25:
            homo_empty_ANN_trust_message = 'homo_empty ANN results are too far from training data, be cautious '
            homo_empty_ANN_trust = 'very low'
        ANN_attributes.update({'homo_empty_trust': homo_empty_ANN_trust})

        ####################################################

        Oxo_ANN_trust = 'not set'
        Oxo_ANN_trust_message = ""
        # Not quite sure if this should be divided by 3 or not, since RAC-155 descriptors
        if float(oxo_dist) < 3:
            Oxo_ANN_trust_message = 'Oxo ANN results should be trustworthy for this complex '
            Oxo_ANN_trust = 'high'
        elif float(oxo_dist) < 5:
            Oxo_ANN_trust_message = 'Oxo ANN results are probably useful for this complex '
            Oxo_ANN_trust = 'medium'
        elif float(oxo_dist) <= 10:
            Oxo_ANN_trust_message = 'Oxo ANN results are fairly far from training data, be cautious '
            Oxo_ANN_trust = 'low'
        elif float(oxo_dist) > 10:
            Oxo_ANN_trust_message = 'Oxo ANN results are too far from training data, be cautious '
            Oxo_ANN_trust = 'very low'
        ANN_attributes.update({'oxo_trust': Oxo_ANN_trust})

        HAT_ANN_trust = 'not set'
        HAT_ANN_trust_message = ""
        # Not quite sure if this should be divided by 3 or not, since RAC-155 descriptors
        if float(hat_dist) < 3:
            HAT_ANN_trust_message = 'HAT ANN results should be trustworthy for this complex '
            HAT_ANN_trust = 'high'
        elif float(hat_dist) < 5:
            HAT_ANN_trust_message = 'HAT ANN results are probably useful for this complex '
            HAT_ANN_trust = 'medium'
        elif float(hat_dist) <= 10:
            HAT_ANN_trust_message = 'HAT ANN results are fairly far from training data, be cautious '
            HAT_ANN_trust = 'low'
        elif float(hat_dist) > 10:
            HAT_ANN_trust_message = 'HAT ANN results are too far from training data, be cautious '
            HAT_ANN_trust = 'very low'
        ANN_attributes.update({'hat_trust': HAT_ANN_trust})
        print(
            "*******************************************************************"
        )
        print(
            "**************       CATALYTIC ANN ACTIVATED!      ****************"
        )
        print(
            "*********** Currently advising on Oxo and HAT energies ************"
        )
        print(
            "*******************************************************************"
        )
        print(("ANN predicts a Oxo20 energy of " +
               "{0:.2f}".format(float(oxo20[0])) + ' kcal/mol at ' +
               "{0:.2f}".format(alpha) + '% HFX'))
        print(Oxo20_ANN_trust_message)
        print(('Distance to Oxo20 training data in the latent space is ' +
               "{0:.2f}".format(oxo20_dist)))
        print(("ANN predicts a empty site beta H**O level of " +
               "{0:.2f}".format(float(homo_empty[0])) + ' eV at ' +
               "{0:.2f}".format(alpha) + '% HFX'))
        print(homo_empty_ANN_trust_message)
        print((
            'Distance to empty site beta H**O level training data in the latent space is '
            + "{0:.2f}".format(homo_empty_dist)))
        print(
            '-------------------------------------------------------------------'
        )
        print(("ANN predicts a oxo formation energy of " +
               "{0:.2f}".format(float(oxo[0])) + ' kcal/mol at ' +
               "{0:.2f}".format(alpha) + '% HFX'))
        print(Oxo_ANN_trust_message)
        print(('Distance to oxo training data in the latent space is ' +
               "{0:.2f}".format(oxo_dist)))
        print(("ANN predicts a HAT energy of " +
               "{0:.2f}".format(float(hat[0])) + ' kcal/mol at ' +
               "{0:.2f}".format(alpha) + '% HFX'))
        print(HAT_ANN_trust_message)
        print(('Distance to HAT training data in the latent space is ' +
               "{0:.2f}".format(hat_dist)))
        print(
            "*******************************************************************"
        )
        print(
            "************** ANN complete, saved in record file *****************"
        )
        print(
            "*******************************************************************"
        )
        from keras import backend as K
        # This is done to get rid of the attribute error that is a bug in tensorflow.
        K.clear_session()

    if catalysis:
        current_time = time.time()
        total_ANN_time = current_time - start_time
        last_time = current_time
        print(('Total Catalysis ML functions took ' +
               "{0:.2f}".format(total_ANN_time) + ' seconds'))

    if not valid and not ANN_reason and not catalysis:
        ANN_reason = ' uncaught rejection (see sdout/stderr)'

    return valid, ANN_reason, ANN_attributes, catalysis

    if False:
        # test Euclidean norm to training data distance
        train_dist, best_row = find_eu_dist(nn_excitation)
        ANN_trust = max(0.01, 1.0 - train_dist)

        ANN_attributes.update({'ANN_closest_train': best_row})

        print((' with closest training row ' + best_row[:-2] + ' at  ' +
               str(best_row[-2:]) + '% HFX'))

        # use ANN to predict fucntional sensitivty
        HFX_slope = 0
        HFX_slope = get_slope(slope_excitation)
        print(('Predicted HFX exchange sensitivity is : ' +
               "{0:.2f}".format(float(HFX_slope)) + ' kcal/HFX'))
        ANN_attributes.update({'ANN_slope': HFX_slope})
コード例 #11
0
ファイル: tsgen.py プロジェクト: edgarin1st/molSimplify
def tsgen(mode, args, rootdir, core, substr, compreact, substreact, globs):
    emsg = False
    this_diag = run_diag()
    strfiles = []
    adjsidx = substr.getBondedAtoms(substreact)[0]
    adjcidx = core.getBondedAtoms(compreact)[0]
    # initialize connecting and frozen atoms for FF opt
    frozenats = []
    for i in range(0, core.natoms):
        frozenats.append(i)
    # also freeze the abstracted atom and the heavy atom bonded to it
    frozenats.append(core.natoms+substreact)
    frozenats.append(core.natoms+adjsidx)
    connected = [core.natoms+substreact]
    # START FUNCTIONALIZING
    sanity = False
    if mode == 2:
        emsg = 'Sorry, this mode is not supported yet. Exiting...'
        return strfiles, emsg, this_diag
    elif mode == 1:  # oxidative addition of a single group
                # get first connecting point
        MXBL = MXdistcoeff*(core.getAtom(compreact).rad +
                            substr.getAtom(substreact).rad)
        cpoint = getconnection(core, compreact, MXBL)
        # distort substrate molecule
        XYBL = XYcoeff*(substr.getAtom(substreact).rad +
                        substr.getAtom(adjsidx).rad)
        substr.BCM(adjsidx, substreact, XYBL)
        # align substrate molecule
        substr.alignmol(substr.getAtom(substreact), atom3D('H', cpoint))
        tmp3D = mol3D()
        tmp3D.copymol3D(core)
        tmp3D.addAtom(atom3D('Cl', cpoint))
        ligalignpts = getconnections(
            tmp3D, tmp3D.natoms-1, compreact, XYBL, MXYang)
        if args.substplaceff:
            # full FF substrate placement
            print('Full FF-based substrate placement specified.')
            en_min = 1e6
            for n, P in enumerate(ligalignpts):
                print(('Evaluating FF energy of point ' +
                       str(n+1)+' of '+str(len(ligalignpts))))
                coretmp = mol3D()
                coretmp.copymol3D(core)
                substrtmp = mol3D()
                substrtmp.copymol3D(substr)
                ts3Dtmp, enc = substplaceff_mode1(
                    coretmp, substrtmp, substreact, compreact, cpoint, P, args, connected, frozenats)
                if enc < en_min:
                    en_min = enc
                    ts3D = mol3D()
                    ts3D.copymol3D(ts3Dtmp)
        else:
            # cheap substrate placement
            print('Cheap substrate placement')
            ligalignpt = substplacecheap(core, ligalignpts, compreact)
            ts3D, enc = substplaceff_mode1(
                core, substr, substreact, compreact, cpoint, ligalignpt, args, connected, frozenats)
    elif mode == 3:  # abstraction
        # distort A-B bond
        ABBL = distance(core.getAtomCoords(compreact), core.getAtomCoords(
            adjcidx)) + 0.05*(core.getAtom(compreact).rad + core.getAtom(adjcidx).rad)
        core.BCM(compreact, adjcidx, ABBL)
        # set B-X distance
        BXBL = 1.1*(substr.getAtom(substreact).rad +
                    core.getAtom(compreact).rad)
        # get possible connecting points
        connPts = getconnections(core, compreact, adjcidx, BXBL, ABXang)
        if args.substplaceff:
            # full FF substrate placement
            print('Full FF-based substrate placement specified.')
            en_min = 1e6
            for n, P in enumerate(connPts):
                print(('Evaluating FF energy of point ' +
                       str(n+1)+' of '+str(len(connPts))))
                coretmp = mol3D()
                coretmp.copymol3D(core)
                substrtmp = mol3D()
                substrtmp.copymol3D(substr)
                ts3Dtmp, enc = substplaceff_mode3(
                    coretmp, substrtmp, substreact, compreact, P, args, connected, frozenats)
                if enc < en_min:
                    en_min = enc
                    ts3D = mol3D()
                    ts3D.copymol3D(ts3Dtmp)
        else:
            # cheap substrate placement
            print('Cheap substrate placement')
            cpoint = substplacecheap(core, connPts, compreact)
            ts3D, enc = substplaceff_mode3(
                core, substr, substreact, compreact, cpoint, args, connected, frozenats)
            if 'a' in args.ffoption:
                print('FF optimized remainder of substrate')
    ts3D.charge += substr.charge
    # END FUNCTIONALIZING
    fname = name_TS(rootdir, args.core, substr, args,
                    bind=args.bind, bsmi=args.nambsmi)
    ts3D.writexyz(fname)
    strfiles.append(fname)
    getinputargs(args, fname)
    pfold = rootdir.split('/', 1)[-1]
    # check for molecule sanity
    sanity, d0 = ts3D.sanitycheck(True)
    if args.debug:
        print(('setting sanity diag, min dist at ' +
               str(d0) + ' (higher is better)'))
    this_diag.set_sanity(sanity, d0)
    this_diag.set_mol(ts3D)
    this_diag.write_report(fname+'.report')
    del ts3D
    if sanity:
        print(('WARNING: Generated complex is not good! Minimum distance between atoms:' +
              "{0:.2f}".format(d0)+'A\n'))
    print(('\nIn folder '+pfold+' generated 1 structure(s)!'))
    return strfiles, emsg, this_diag