예제 #1
0
    def __init__(self,
                 line_notation,
                 input_type,
                 convtype="OB",
                 isotopes=False):
        self.obmol = None
        self.n_bonds = 0
        self.n_atoms = 0
        self.all_bonds = {}
        self.double_b = []
        self.e_z = []
        self.isotopes = isotopes
        self.number_explicit_db = 0
        obConversion = openbabel.OBConversion()
        mol_can = openbabel.OBMol()
        obConversion.SetInAndOutFormats(input_type, "can")
        obConversion.ReadString(mol_can, line_notation)
        can_smi = obConversion.WriteString(mol_can)
        self.can_smi = can_smi

        #sets the molecule - mol MUST BE A OBMOL
        #using the canonical SMILES
        obConversion = openbabel.OBConversion()
        obConversion.SetInFormat("can")
        mol = openbabel.OBMol()
        obConversion.ReadString(mol, can_smi)
        mol.AddHydrogens()
        self.obmol = mol
        self.n_bonds = self.obmol.NumBonds()
        self.all_bonds = {}
        double_b = 0

        for bond_id in range(self.n_bonds):
            bond = self.obmol.GetBond(bond_id)
            #check how many double bonds exist in the molecule
            if bond.IsDouble() and not bond.IsAromatic():
                double_b += 1

        if double_b > 0:
            if convtype == "MARVIN":
                #use Marvin - molconvert (ChemAxon) to produce the MOLFILE
                #WARNING: indexes may be different
                molfile = convert_molconv("can", can_smi)
                #sets the molecule with Hs and 2D coordinates
                obConversion = openbabel.OBConversion()
                mol = openbabel.OBMol()
                obConversion.SetInFormat("mol")
                obConversion.ReadString(mol, molfile)
                self.obmol = mol
            else:
                #use openbabel to produce the 2D MOL
                self.obmol = convert(mol)

            self.number_explicit_db = explicit_cistrans(self.can_smi)
            self.n_bonds = self.obmol.NumBonds()
            self.n_atoms = self.obmol.NumAtoms()
            for atom_id in range(self.n_atoms):
                at = self.obmol.GetAtom(atom_id + 1)
                bonds = self.get_bonds(self.obmol, atom_id)
                for b in bonds:
                    self.all_bonds.setdefault(b[0], {})[b[1]] = 1
                    self.all_bonds.setdefault(b[1], {})[b[0]] = 1
            self.calc_e_z()
        else:
            self.e_z = [0] * self.n_bonds
        implicit_db = 0
        for x in self.e_z:
            if x != 0:
                implicit_db = implicit_db + 1
        if self.number_explicit_db != implicit_db:
            self.e_z = [0] * self.n_bonds
예제 #2
0
#!/usr/bin/python

import openbabel as ob
import sys, glob, os
import numpy as np


lis = sys.argv
lis.pop(0)

mol = ob.OBMol()
xx = ob.OBConversion()

print lis,

xx.SetInFormat("g09")

for x in sorted(lis):
    seed = x.strip(".log")
    x = xx.ReadFile(mol, x)
    print x
    f = open("NICS/" + seed  + ".NICS.com", "w")
    f.write("""%Mem=9600Mb
%NProcShared=8
#P  B3LYP/6-31+G(d,p)
    NMR Int=(Grid=UltraFine)
    pop=hirshfeld
    gfinput iop(6/7=3) pop=full NoSym

""")
    f.close()
예제 #3
0
 def testSimple(self):
     mol = ob.OBMol()
     conv = ob.OBConversion()
     conv.SetInFormat("smi")
     conv.ReadString(mol, "CC(=O)Cl")
     self.assertAlmostEqual(mol.GetMolWt(), 78.5, 1)
    def Algorithm2(self, path, n):
        # Make dir
        fname = ntpath.basename(path)
        index = fname.find(f'.{inFormat}')
        fname = fname[0:index]
        odir = f'{molODir}/{fname}/'
        shutil.rmtree(odir, ignore_errors=True)
        os.mkdir(odir)

        # Read input file
        for N in range(1, n + 1):
            obConversion = ob.OBConversion()
            obConversion.SetInAndOutFormats(inFormat, ouFormat)
            orgMol = ob.OBMol()
            obConversion.ReadFile(orgMol, path)

            # Ищем в молекуле вращающиеся "куски"
            rotMols = []
            rotBonds = []
            g = nx.Graph()
            for bond in ob.OBMolBondIter(orgMol):
                i = bond.GetBeginAtom().GetId()
                j = bond.GetEndAtom().GetId()
                g.add_nodes_from([i, j])
                if bond.IsRotor():
                    rotBonds.append(bond)
                else:
                    g.add_edge(i, j)
            for sg in nx.connected_component_subgraphs(g):
                nodes = list(sg.nodes)
                newMol = ob.OBMol()

                # Copy atoms
                for i in nodes:
                    atom = orgMol.GetAtomById(i)
                    copyAtom(atom, newMol)

                # Copy bonds
                for bond in ob.OBMolBondIter(orgMol):
                    begin = bond.GetBeginAtom().GetId()
                    end = bond.GetEndAtom().GetId()
                    if begin in nodes and end in nodes:
                        copyBond(bond, newMol)

                rotMols.append(newMol)

            # Генерируем конформацию локальных структур
            nodir = f'{odir}{N}/'
            lodir = f'{nodir}local-conf/'
            sodir = f'{nodir}connects/'
            os.mkdir(nodir)
            os.mkdir(lodir)
            os.mkdir(sodir)

            structs = []
            pairs = zip(range(len(rotMols)), rotMols)
            for (i, mol) in pairs:
                print('-' * 108,
                      '-' * 50 + f' Mol #{i} ' + '-' * 50,
                      '-' * 108,
                      sep='\n')
                struct = NewStruct(mol)
                struct.minimization(['-ff', 'MMFF94s'])
                err = 0
                struct.fix_matrix()
                structs.append(struct)

                # Write struct to file
                lfname = lodir + f'local-conf-{i}.{ouFormat}'
                title = f'Coordinate refinement error: {err}'
                mol.SetTitle(title)
                obConversion.WriteFile(mol, lfname)

            # Connect structs
            pairs = zip(range(1, len(structs)), structs)
            struct0 = structs[0]
            for (i, struct) in pairs:
                print('-' * 108,
                      '-' * 50 + f' Mol #0 + Mol #{i} ' + '-' * 41,
                      '-' * 108,
                      sep='\n')
                structi = structs[i]
                (err, tol, itr) = struct0.connect_struct(structi,
                                                         rotBonds,
                                                         tol=0.01,
                                                         tolStep=0.02,
                                                         tolMax=100,
                                                         iterMax=10000)
                struct0.fix_matrix()

                # Write struct to file
                sfname = sodir + f'conncet-{i}.{ouFormat}'
                title = f'Error: {err}, Tol: {tol}, Iter: {itr}'
                struct0.molecule.SetTitle(title)
                obConversion.WriteFile(struct0.molecule, sfname)

            # Print result
            struct0.molecule.SetTitle('')
            sfname = nodir + f'{fname}.{ouFormat}'
            obConversion.WriteFile(struct0.molecule, sfname)
예제 #5
0
def minimize_ob(selection='enabled', state=-1, ff='UFF', nsteps=500,
        conv=0.0001, cutoff=0, cut_vdw=6.0, cut_elec=8.0,
        name='', quiet=1, _self=cmd):
    '''
DESCRIPTION

    Emergy minimization with openbabel

    Supports fixed atoms (flag fix)

ARGUMENTS

    selection = str: atom selection

    state = int: object state {default: -1}

    ff = GAFF|MMFF94s|MMFF94|UFF|Ghemical: force field {default: UFF}

    nsteps = int: number of steps {default: 500}
    '''
    import openbabel as ob

    state = int(state)

    sele = _self.get_unused_name('_sele')
    _self.select(sele, selection, 0)

    try:
        ioformat = 'mol'
        molstr = _self.get_str(ioformat, sele, state)

        obconversion = ob.OBConversion()
        obconversion.SetInAndOutFormats(ioformat, ioformat)

        mol = ob.OBMol()
        obconversion.ReadString(mol, molstr)

        # add hydrogens
        orig_ids = [a.GetId() for a in ob.OBMolAtomIter(mol)]
        mol.AddHydrogens()
        added_ids = set(a.GetId() for a in ob.OBMolAtomIter(mol)).difference(orig_ids)

        consttrains = ob.OBFFConstraints()
        consttrains.Setup(mol)

        # atoms with "flag fix"
        fixed_indices = get_fixed_indices(sele, state, _self)
        for idx in fixed_indices:
            consttrains.AddAtomConstraint(idx + 1)

        # setup forcefield (one of: GAFF, MMFF94s, MMFF94, UFF, Ghemical)
        ff = ob.OBForceField.FindForceField(ff)
        ff.Setup(mol, consttrains)

        if int(cutoff):
            ff.EnableCutOff(True)
            ff.SetVDWCutOff(float(cut_vdw))
            ff.SetElectrostaticCutOff(float(cut_elec))

        # run minimization
        ff.SteepestDescent(int(nsteps) // 2, float(conv))
        ff.ConjugateGradients(int(nsteps) // 2, float(conv))
        ff.GetCoordinates(mol)

        # remove previously added hydrogens
        for hydro_id in added_ids:
            mol.DeleteAtom(mol.GetAtomById(hydro_id))

        molstr = obconversion.WriteString(mol)
        load_or_update(molstr, name, sele, state, _self)

        if not int(quiet):
            print(' Energy: %8.2f %s' % (ff.Energy(), ff.GetUnit()))
    finally:
        _self.delete(sele)
예제 #6
0
def process_arpeggio(mol2_filename):

    slig = SmallMol(mol2_filename)
    slig.write('tmp.pdb')

    pdb_filename = 'tmp.pdb'

    # LOAD STRUCTURE (BIOPYTHON)
    pdb_parser = PDBParser()
    s = pdb_parser.get_structure('structure', pdb_filename)
    s_atoms = list(s.get_atoms())

    logging.info('Loaded PDB structure (BioPython)')

    # CHECK FOR HYDROGENS IN THE INPUT STRUCTURE
    input_has_hydrogens = False
    hydrogens = [x for x in s_atoms if x.element == 'H']

    if hydrogens:
        logging.info(
            'Detected that the input structure contains hydrogens. Hydrogen addition will be skipped.'
        )
        input_has_hydrogens = True

    # LOAD STRUCTURE (OPENBABEL)
    ob_conv = ob.OBConversion()
    ob_conv.SetInFormat('pdb')
    mol = ob.OBMol()
    ob_conv.ReadFile(mol, pdb_filename)

    # CHECK THAT EACH ATOM HAS A UNIQUE SERIAL NUMBER
    all_serials = [x.serial_number for x in s_atoms]

    if len(all_serials) > len(set(all_serials)):
        raise AtomSerialError

    # MAPPING OB ATOMS TO BIOPYTHON ATOMS AND VICE VERSA

    # FIRST MAP PDB SERIAL NUMBERS TO BIOPYTHON ATOMS FOR SPEED LATER
    # THIS AVOIDS LOOPING THROUGH `s_atoms` MANY TIMES
    serial_to_bio = {x.serial_number: x for x in s_atoms}

    # DICTIONARIES FOR CONVERSIONS
    ob_to_bio = {}
    bio_to_ob = {}

    for ob_atom in ob.OBMolAtomIter(mol):

        serial = ob_atom.GetResidue().GetSerialNum(ob_atom)

        # MATCH TO THE BIOPYTHON ATOM BY SERIAL NUMBER
        try:
            biopython_atom = serial_to_bio[serial]

        except KeyError:
            # ERRORWORTHY IF WE CAN'T MATCH AN OB ATOM TO A BIOPYTHON ONE
            raise OBBioMatchError(serial)

        # `Id` IS A UNIQUE AND STABLE ID IN OPENBABEL
        # CAN RECOVER THE ATOM WITH `mol.GetAtomById(id)`
        ob_to_bio[ob_atom.GetId()] = biopython_atom
        bio_to_ob[biopython_atom] = ob_atom.GetId()

    logging.info('Mapped OB to BioPython atoms and vice-versa.')

    # ADD EMPTY DATA STRUCTURES FOR TAGGED ATOM DATA
    # IN A SINGLE ITERATION
    for atom in s_atoms:

        # FOR ATOM TYPING VIA OPENBABEL
        atom.atom_types = set([])

        # LIST FOR EACH ATOM TO STORE EXPLICIT HYDROGEN COORDINATES
        atom.h_coords = []

        # DETECT METALS
        if atom.element.upper() in METALS:
            atom.is_metal = True
        else:
            atom.is_metal = False

        # DETECT HALOGENS
        if atom.element.upper() in HALOGENS:
            atom.is_halogen = True
        else:
            atom.is_halogen = False

    # ADD EXPLICIT HYDROGEN COORDS FOR H-BONDING INTERACTIONS
    # ADDING HYDROGENS DOESN'T SEEM TO INTERFERE WITH ATOM SERIALS (THEY GET ADDED AS 0)
    # SO WE CAN STILL GET BACK TO THE PERSISTENT BIOPYTHON ATOMS THIS WAY.
    if not input_has_hydrogens:
        mol.AddHydrogens(False, True, ph)  # polaronly, correctForPH, pH

        logging.info('Added hydrogens.')

    # ATOM TYPING VIA OPENBABEL
    # ITERATE OVER ATOM TYPE SMARTS DEFINITIONS
    for atom_type, smartsdict in ATOM_TYPES.items():

        #logging.info('Typing: {}'.format(atom_type))

        # FOR EACH ATOM TYPE SMARTS STRING
        for smarts in smartsdict.values():

            #logging.info('Smarts: {}'.format(smarts))

            # GET OPENBABEL ATOM MATCHES TO THE SMARTS PATTERN
            ob_smart = ob.OBSmartsPattern()
            ob_smart.Init(str(smarts))

            #logging.info('Initialised for: {}'.format(smarts))

            ob_smart.Match(mol)

            #logging.info('Matched for: {}'.format(smarts))

            matches = [x for x in ob_smart.GetMapList()]

            #logging.info('List comp matches: {}'.format(smarts))

            if matches:

                # REDUCE TO A SINGLE LIST
                matches = set(reduce(operator.add, matches))

                #logging.info('Set reduce matches: {}'.format(smarts))

                for match in matches:

                    atom = mol.GetAtom(match)
                    ob_to_bio[atom.GetId()].atom_types.add(atom_type)

                #logging.info('Assigned types: {}'.format(smarts))

    # ALL WATER MOLECULES ARE HYDROGEN BOND DONORS AND ACCEPTORS
    for atom in (x for x in s_atoms if x.get_full_id()[3][0] == 'W'):
        atom.atom_types.add('hbond acceptor')
        atom.atom_types.add('hbond donor')

    # OVERRIDE PROTEIN ATOM TYPING FROM DICTIONARY
    for residue in s.get_residues():

        if residue.resname in STD_RES:

            for atom in residue.child_list:

                # REMOVE TYPES IF ALREADY ASSIGNED FROM SMARTS
                for atom_type in PROT_ATOM_TYPES.keys():
                    atom.atom_types.discard(atom_type)

                # ADD ATOM TYPES FROM DICTIONARY
                for atom_type, atom_ids in PROT_ATOM_TYPES.items():

                    atom_id = residue.resname.strip() + atom.name.strip()

                    if atom_id in atom_ids:
                        atom.atom_types.add(atom_type)

    def make_pymol_string(entity):
        '''
        Feed me a BioPython atom or BioPython residue.
        See `http://pymol.sourceforge.net/newman/user/S0220commands.html`.
        chain-identifier/resi-identifier/name-identifier
        chain-identifier/resi-identifier/
        '''

        if isinstance(entity, Atom):

            chain = entity.get_parent().get_parent()
            residue = entity.get_parent()
            atom_name = entity.name

        elif isinstance(entity, Residue):
            chain = entity.get_parent()
            residue = entity
            atom_name = ''

        else:
            raise TypeError(
                'Cannot make a PyMOL string from a non-Atom or Residue object.'
            )

        res_num = residue.id[1]

        # ADD INSERTION CODE IF NEED BE
        if residue.id[2] != ' ':
            res_num = str(res_num) + residue.id[2]

        macro = '{}/{}/{}'.format(chain.id, res_num, atom_name)

        return macro

    '''

    with open(pdb_filename.replace('.pdb', '.atomtypes'), 'w') as fo:

        if headers:
            fo.write('{}\n'.format('\t'.join(
                ['atom', 'atom_types']
            )))

        for atom in s_atoms:
            fo.write('{}\n'.format('\t'.join([str(x) for x in [make_pymol_string(atom), sorted(tuple(atom.atom_types))]])))

    logging.info('Typed atoms.')


    '''

    return s_atoms
예제 #7
0
def parse_mol_info(fname, fcharges, axis, buffa, buffo, pbcbonds, printdih,
                   ignorebonds, ignoreimproper):
    iaxis = {"x": 0, "y": 1, "z": 2}
    if axis in iaxis:
        repaxis = iaxis[axis]
    else:
        print("Error: invalid axis")
        sys.exit(0)

    if fcharges:
        chargesLabel = {}
        with open(fcharges, "r") as f:
            for line in f:
                chargesLabel[line.split()[0]] = float(line.split()[1])

    # set openbabel file format
    base, ext = os.path.splitext(fname)
    obConversion = openbabel.OBConversion()
    obConversion.SetInAndOutFormats(ext[1:], "xyz")
    # trick to disable ring perception and make the ReadFile waaaay faster
    # Source: https://sourceforge.net/p/openbabel/mailman/openbabel-discuss/thread/56e1812d-396a-db7c-096d-d378a077853f%40ipcms.unistra.fr/#msg36225392
    obConversion.AddOption("b", openbabel.OBConversion.INOPTIONS)

    # read molecule to OBMol object
    mol = openbabel.OBMol()
    obConversion.ReadFile(mol, fname)
    mol.ConnectTheDots()  # necessary because of the 'b' INOPTION

    # split the molecules
    molecules = mol.Separate()

    # detect the molecules types
    mTypes = {}
    mapmTypes = {}
    atomIdToMol = {}
    nty = 0
    for i, submol in enumerate(molecules, start=1):
        atomiter = openbabel.OBMolAtomIter(submol)
        atlist = []
        for at in atomiter:
            atlist.append(at.GetAtomicNum())
            atomIdToMol[at.GetId()] = i
        foundType = None

        for ty in mTypes:
            # check if there's already a molecule of this type
            if atlist == mTypes[ty]:
                foundType = ty

        # if not, create a new type
        if not foundType:
            nty += 1
            foundType = nty
            mTypes[nty] = atlist

        mapmTypes[i] = foundType

    # get atomic labels from pdb
    idToAtomicLabel = {}
    if ext[1:] == "pdb":
        for res in openbabel.OBResidueIter(mol):
            for atom in openbabel.OBResidueAtomIter(res):
                if (atomIdToMol[atom.GetId()] > 1) and (len(mTypes) > 1):
                    idToAtomicLabel[
                        atom.GetId()] = res.GetAtomID(atom).strip() + str(
                            mapmTypes[atomIdToMol[atom.GetId()]])
                else:
                    idToAtomicLabel[atom.GetId()] = res.GetAtomID(atom).strip()
    else:
        if not ob3:
            etab = openbabel.OBElementTable()
        for atom in openbabel.OBMolAtomIter(mol):
            if (atomIdToMol[atom.GetId()] > 1) and (len(mTypes) > 1):
                if ob3:
                    idToAtomicLabel[atom.GetId()] = openbabel.GetSymbol(
                        atom.GetAtomicNum()) + str(
                            mapmTypes[atomIdToMol[atom.GetId()]])
                else:
                    idToAtomicLabel[atom.GetId()] = etab.GetSymbol(
                        atom.GetAtomicNum()) + str(
                            mapmTypes[atomIdToMol[atom.GetId()]])
            else:
                if ob3:
                    idToAtomicLabel[atom.GetId()] = openbabel.GetSymbol(
                        atom.GetAtomicNum())
                else:
                    idToAtomicLabel[atom.GetId()] = etab.GetSymbol(
                        atom.GetAtomicNum())

    # print(idToAtomicLabel)

    # identify atom types and get masses
    outMasses = "Masses\n\n"

    massTypes = {}
    mapTypes = {}
    nmassTypes = 0
    atomIterator = openbabel.OBMolAtomIter(mol)
    for atom in atomIterator:
        i = atom.GetId()
        if idToAtomicLabel[i] not in massTypes:
            nmassTypes += 1
            mapTypes[nmassTypes] = idToAtomicLabel[i]
            massTypes[idToAtomicLabel[i]] = nmassTypes
            outMasses += "\t%d\t%.3f\t# %s\n" % (
                nmassTypes, atom.GetAtomicMass(), idToAtomicLabel[i])

    # create atoms list
    outAtoms = "Atoms # full\n\n"

    xmin = float("inf")
    xmax = float("-inf")
    ymin = float("inf")
    ymax = float("-inf")
    zmin = float("inf")
    zmax = float("-inf")
    natoms = 0
    acoords = []
    for mnum, imol in enumerate(molecules, start=1):
        atomIterator = openbabel.OBMolAtomIter(imol)
        for atom in sorted(atomIterator, key=lambda x: x.GetId()):
            natoms += 1
            i = atom.GetId()
            apos = (atom.GetX(), atom.GetY(), atom.GetZ())
            acoords.append(Atom(atom.GetAtomicNum(), apos))

            # look for the maximum and minimum x for the box (improve later with numpy and all coordinates)
            if apos[0] > xmax:
                xmax = apos[0]
            if apos[0] < xmin:
                xmin = apos[0]
            if apos[1] > ymax:
                ymax = apos[1]
            if apos[1] < ymin:
                ymin = apos[1]
            if apos[2] > zmax:
                zmax = apos[2]
            if apos[2] < zmin:
                zmin = apos[2]

            if fcharges:
                outAtoms += "\t%d\t%d\t%d\t%.6f\t%.4f\t%.4f\t%.4f\t# %s\n" % (
                    i + 1, mnum, massTypes[idToAtomicLabel[i]],
                    chargesLabel[idToAtomicLabel[i]], atom.GetX(), atom.GetY(),
                    atom.GetZ(), idToAtomicLabel[i])
            else:
                outAtoms += "\t%d\t%d\t%d\tX.XXXXXX\t%.4f\t%.4f\t%.4f\t# %s\n" % (
                    i + 1, mnum, massTypes[idToAtomicLabel[i]], atom.GetX(),
                    atom.GetY(), atom.GetZ(), idToAtomicLabel[i])

    # define box shape and size
    try:
        fromBounds = False
        rcell = mol.GetData(12)
        cell = openbabel.toUnitCell(rcell)
        v1 = [
            cell.GetCellVectors()[0].GetX(),
            cell.GetCellVectors()[0].GetY(),
            cell.GetCellVectors()[0].GetZ()
        ]
        v2 = [
            cell.GetCellVectors()[1].GetX(),
            cell.GetCellVectors()[1].GetY(),
            cell.GetCellVectors()[1].GetZ()
        ]
        v3 = [
            cell.GetCellVectors()[2].GetX(),
            cell.GetCellVectors()[2].GetY(),
            cell.GetCellVectors()[2].GetZ()
        ]
        boxinfo = [v1, v2, v3]
        orthogonal = True
        for i, array in enumerate(boxinfo):
            for j in range(3):
                if i == j:
                    continue
                if not math.isclose(0., array[j], abs_tol=1e-6):
                    orthogonal = False
    except:
        fromBounds = True
        v1 = [xmax - xmin, 0., 0.]
        v2 = [0., ymax - ymin, 0.]
        v3 = [0., 0., zmax - zmin]
        orthogonal = True

    # add buffer
    if orthogonal:
        buf = []
        boxinfo = [v1, v2, v3]
        for i, val in enumerate(boxinfo[repaxis]):
            if i == repaxis:
                buf.append(val + buffa)
            else:
                buf.append(val)
        boxinfo[repaxis] = buf
        for i in range(3):
            if i == repaxis:
                continue
            buf = []
            for j, val in enumerate(boxinfo[i]):
                if j == i:
                    buf.append(val + buffo)
                else:
                    buf.append(val)
            boxinfo[i] = buf

    # print(boxinfo)

    # Duplicate to get the bonds in the PBC. Taken from (method _crd2bond):
    # https://github.com/tongzhugroup/mddatasetbuilder/blob/66eb0f15e972be0f5534dcda27af253cd8891ff2/mddatasetbuilder/detect.py#L213
    if pbcbonds:
        acoords = Atoms(acoords, cell=boxinfo, pbc=True)
        repatoms = acoords.repeat(
            2
        )[natoms:]  # repeat the unit cell in each direction (len(repatoms) = 7*natoms)
        tree = cKDTree(acoords.get_positions())
        d = tree.query(repatoms.get_positions(), k=1)[0]
        nearest = d < 8.
        ghost_atoms = repatoms[nearest]
        realnumber = np.where(nearest)[0] % natoms
        acoords += ghost_atoms

        write("replicated.xyz",
              acoords)  # write the structure with the replicated atoms

        # write new mol with new bonds
        nmol = openbabel.OBMol()
        nmol.BeginModify()
        for idx, (num, position) in enumerate(
                zip(acoords.get_atomic_numbers(), acoords.positions)):
            a = nmol.NewAtom(idx)
            a.SetAtomicNum(int(num))
            a.SetVector(*position)
        nmol.ConnectTheDots()
        # nmol.PerceiveBondOrders() # super slow becauses it looks for rings
        nmol.EndModify()
    else:
        acoords = Atoms(acoords, cell=boxinfo, pbc=False)
        nmol = openbabel.OBMol()
        nmol.BeginModify()
        for idx, (num, position) in enumerate(
                zip(acoords.get_atomic_numbers(), acoords.positions)):
            a = nmol.NewAtom(idx)
            a.SetAtomicNum(int(num))
            a.SetVector(*position)
        nmol.ConnectTheDots()
        # nmol.PerceiveBondOrders() # super slow becauses it looks for rings
        nmol.EndModify()

    # identify bond types and create bond list
    outBonds = "Bonds # harmonic\n\n"

    bondTypes = {}
    mapbTypes = {}
    nbondTypes = 0
    nbonds = 0
    bondsToDelete = []
    bondIterators = []
    if ignorebonds:
        sepmols = nmol.Separate()
        for smol in sepmols[1:]:
            bondIterators.append(openbabel.OBMolBondIter(smol))
    else:
        bondIterators.append(openbabel.OBMolBondIter(nmol))

    lastidx = 1
    for iterator in bondIterators:
        for i, bond in enumerate(iterator, lastidx):
            b1 = bond.GetBeginAtom().GetId()
            b2 = bond.GetEndAtom().GetId()

            # check if its a bond of the replica only
            if (b1 >= natoms) and (b2 >= natoms):
                bondsToDelete.append(bond)
                continue
            # remap to a real atom if needed
            if b1 >= natoms:
                b1 = realnumber[b1 - natoms]
            if b2 >= natoms:
                b2 = realnumber[b2 - natoms]

            # identify bond type
            btype1 = "%s - %s" % (idToAtomicLabel[b1], idToAtomicLabel[b2])
            btype2 = "%s - %s" % (idToAtomicLabel[b2], idToAtomicLabel[b1])

            if btype1 in bondTypes:
                bondid = bondTypes[btype1]
                bstring = btype1
            elif btype2 in bondTypes:
                bondid = bondTypes[btype2]
                bstring = btype2
            else:
                nbondTypes += 1
                mapbTypes[nbondTypes] = btype1
                bondid = nbondTypes
                bondTypes[btype1] = nbondTypes
                bstring = btype1

            nbonds += 1
            outBonds += "\t%d\t%d\t%d\t%d\t# %s\n" % (nbonds, bondid, b1 + 1,
                                                      b2 + 1, bstring)

        lastidx = i

    # delete the bonds of atoms from other replicas
    for bond in bondsToDelete:
        nmol.DeleteBond(bond)

    # identify angle types and create angle list
    angleTypes = {}
    mapaTypes = {}
    nangleTypes = 0
    nangles = 0
    angleIterators = []

    if ignorebonds:
        sepmols = nmol.Separate()
        for smol in sepmols[1:]:
            smol.FindAngles()
            angleIterators.append(openbabel.OBMolAngleIter(smol))
        prevnumatoms = sepmols[0].NumAtoms()
    else:
        nmol.FindAngles()
        angleIterators.append(openbabel.OBMolAngleIter(nmol))

    outAngles = "Angles # harmonic\n\n"

    lastidx = 1
    for j, iterator in enumerate(angleIterators, 1):
        for i, angle in enumerate(iterator, lastidx):
            if ignorebonds:
                a1 = angle[1] + prevnumatoms
                a2 = angle[0] + prevnumatoms
                a3 = angle[2] + prevnumatoms
            else:
                a1 = angle[1]
                a2 = angle[0]
                a3 = angle[2]

            # remap to a real atom if needed
            if a1 >= natoms:
                a1 = realnumber[a1 - natoms]
            if a2 >= natoms:
                a2 = realnumber[a2 - natoms]
            if a3 >= natoms:
                a3 = realnumber[a3 - natoms]

            atype1 = "%s - %s - %s" % (
                idToAtomicLabel[a1], idToAtomicLabel[a2], idToAtomicLabel[a3])
            atype2 = "%s - %s - %s" % (
                idToAtomicLabel[a3], idToAtomicLabel[a2], idToAtomicLabel[a1])

            if atype1 in angleTypes:
                angleid = angleTypes[atype1]
                astring = atype1
            elif atype2 in angleTypes:
                angleid = angleTypes[atype2]
                astring = atype2
            else:
                nangleTypes += 1
                mapaTypes[nangleTypes] = atype1
                angleid = nangleTypes
                angleTypes[atype1] = nangleTypes
                astring = atype1

            nangles += 1
            outAngles += "\t%d\t%d\t%d\t%d\t%d\t# %s\n" % (
                nangles, angleid, a1 + 1, a2 + 1, a3 + 1, astring)

        lastidx = i
        if ignorebonds:
            prevnumatoms += sepmols[j].NumAtoms()

    # identify dihedral types and create dihedral list
    if printdih:
        dihedralTypes = {}
        mapdTypes = {}
        ndihedralTypes = 0
        ndihedrals = 0
        dihedralIterators = []

        if ignorebonds:
            sepmols = nmol.Separate()
            for smol in sepmols[1:]:
                smol.FindTorsions()
                dihedralIterators.append(openbabel.OBMolTorsionIter(smol))
        else:
            nmol.FindTorsions()
            dihedralIterators.append(openbabel.OBMolTorsionIter(nmol))

        outDihedrals = "Dihedrals # charmmfsw\n\n"

        lastidx = 1
        for iterator in dihedralIterators:
            for i, dihedral in enumerate(iterator, lastidx):
                a1 = dihedral[0]
                a2 = dihedral[1]
                a3 = dihedral[2]
                a4 = dihedral[3]

                # remap to a real atom if needed
                if a1 >= natoms:
                    a1 = realnumber[a1 - natoms]
                if a2 >= natoms:
                    a2 = realnumber[a2 - natoms]
                if a3 >= natoms:
                    a3 = realnumber[a3 - natoms]
                if a4 >= natoms:
                    a4 = realnumber[a4 - natoms]

                dtype1 = "%s - %s - %s - %s" % (
                    idToAtomicLabel[a1], idToAtomicLabel[a2],
                    idToAtomicLabel[a3], idToAtomicLabel[a4])
                dtype2 = "%s - %s - %s - %s" % (
                    idToAtomicLabel[a4], idToAtomicLabel[a3],
                    idToAtomicLabel[a2], idToAtomicLabel[a1])

                if dtype1 in dihedralTypes:
                    dihedralid = dihedralTypes[dtype1]
                    dstring = dtype1
                elif dtype2 in dihedralTypes:
                    dihedralid = dihedralTypes[dtype2]
                    dstring = dtype2
                else:
                    ndihedralTypes += 1
                    mapdTypes[ndihedralTypes] = dtype1
                    dihedralid = ndihedralTypes
                    dihedralTypes[dtype1] = ndihedralTypes
                    dstring = dtype1

                ndihedrals += 1
                outDihedrals += "\t%d\t%d\t%d\t%d\t%d\t%d\t# %s\n" % (
                    ndihedrals, dihedralid, a1 + 1, a2 + 1, a3 + 1, a4 + 1,
                    dstring)

            lastidx = i

        if not ignoreimproper:
            # look for the improper dihedrals
            improperDihedralTypes = {}
            mapiDTypes = {}
            niDihedralTypes = 0
            niDihedrals = 0
            mollist = []

            if ignorebonds:
                sepmols = nmol.Separate()
                for smol in sepmols[1:]:
                    smol.PerceiveBondOrders()
                    mollist.append(smol)
            else:
                nmol.PerceiveBondOrders()
                mollist.append(nmol)

            outImpropers = "Impropers # harmonic\n\n"

            for imol in mollist:
                atomIterator = openbabel.OBMolAtomIter(imol)
                for atom in atomIterator:
                    try:
                        # print(atom.GetHyb(), atom.GetAtomicNum(), atom.GetValence())
                        expDegree = atom.GetValence()
                    except:
                        # print(atom.GetHyb(), atom.GetAtomicNum(), atom.GetExplicitDegree())
                        expDegree = atom.GetExplicitDegree()

                    # returns impropers for atoms with connected to other 3 atoms and SP2 hybridization
                    if atom.GetHyb() == 2 and expDegree == 3:
                        connectedAtoms = []
                        for atom2, depth in openbabel.OBMolAtomBFSIter(
                                imol,
                                atom.GetId() + 1):
                            if depth == 2:
                                connectedAtoms.append(atom2)

                        torsional = [
                            atom.GetId() + 1, connectedAtoms[0].GetId() + 1,
                            connectedAtoms[1].GetId() + 1,
                            connectedAtoms[2].GetId() + 1
                        ]

                        a1 = torsional[0] - 1
                        a2 = torsional[1] - 1
                        a3 = torsional[2] - 1
                        a4 = torsional[3] - 1

                        # remap to a real atom if needed
                        if a1 >= natoms:
                            a1 = realnumber[a1 - natoms]
                        if a2 >= natoms:
                            a2 = realnumber[a2 - natoms]
                        if a3 >= natoms:
                            a3 = realnumber[a3 - natoms]
                        if a4 >= natoms:
                            a4 = realnumber[a4 - natoms]

                        dtype1 = "%s - %s - %s - %s" % (
                            idToAtomicLabel[a1], idToAtomicLabel[a2],
                            idToAtomicLabel[a3], idToAtomicLabel[a4])
                        dtype2 = "%s - %s - %s - %s" % (
                            idToAtomicLabel[a4], idToAtomicLabel[a3],
                            idToAtomicLabel[a2], idToAtomicLabel[a1])

                        if dtype1 in improperDihedralTypes:
                            idihedralid = improperDihedralTypes[dtype1]
                            dstring = dtype1
                        elif dtype2 in improperDihedralTypes:
                            idihedralid = improperDihedralTypes[dtype2]
                            dstring = dtype2
                        else:
                            niDihedralTypes += 1
                            mapiDTypes[niDihedralTypes] = dtype1
                            idihedralid = niDihedralTypes
                            improperDihedralTypes[dtype1] = niDihedralTypes
                            dstring = dtype1

                        niDihedrals += 1
                        outImpropers += "\t%d\t%d\t%d\t%d\t%d\t%d\t# %s\n" % (
                            niDihedrals, idihedralid, a1 + 1, a2 + 1, a3 + 1,
                            a4 + 1, dstring)

    # print header
    if printdih and (ndihedrals > 0):
        if ignoreimproper or (niDihedrals == 0):
            header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\t%d dihedrals\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\t%d dihedral types\n\n" % (
                fname, natoms, nbonds, nangles, ndihedrals, nmassTypes,
                nbondTypes, nangleTypes, ndihedralTypes)
        else:
            header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\t%d dihedrals\n\t%d impropers\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\t%d dihedral types\n\t%d improper types\n\n" % (
                fname, natoms, nbonds, nangles, ndihedrals, niDihedrals,
                nmassTypes, nbondTypes, nangleTypes, ndihedralTypes,
                niDihedralTypes)
    else:
        header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\n" % (
            fname, natoms, nbonds, nangles, nmassTypes, nbondTypes,
            nangleTypes)

    # add box info
    if fromBounds:
        boxsize = [(xmin, xmax), (ymin, ymax), (zmin, zmax)]
        boxsize[repaxis] = (boxsize[repaxis][0] - buffa / 2.,
                            boxsize[repaxis][1] + buffa / 2.)
        for i in range(3):
            if i == repaxis:
                continue
            boxsize[i] = (boxsize[i][0] - buffo / 2.,
                          boxsize[i][1] + buffo / 2.)
        header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n" % (
            boxsize[0][0], boxsize[0][1], boxsize[1][0], boxsize[1][1],
            boxsize[2][0], boxsize[2][1])
    else:
        if orthogonal:
            header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n" % (
                0., boxinfo[0][0], 0., boxinfo[1][1], 0., boxinfo[2][2])
        else:
            header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n\t%.8f\t%.8f\t%.8f\t xy xz yz\n" % (
                0., boxinfo[0][0], 0., boxinfo[1][1], 0., boxinfo[2][2],
                boxinfo[1][0], boxinfo[2][0], boxinfo[2][1])

    # print Coeffs
    outCoeffs = "Pair Coeffs\n\n"

    for i in range(1, nmassTypes + 1):
        outCoeffs += "\t%d\teps\tsig\t# %s\n" % (i, mapTypes[i])

    outCoeffs += "\nBond Coeffs\n\n"

    for i in range(1, nbondTypes + 1):
        outCoeffs += "\t%d\tK\tr_0\t# %s\n" % (i, mapbTypes[i])

    outCoeffs += "\nAngle Coeffs\n\n"

    for i in range(1, nangleTypes + 1):
        outCoeffs += "\t%d\tK\ttetha_0 (deg)\t# %s\n" % (i, mapaTypes[i])

    if printdih and (ndihedrals > 0):
        outCoeffs += "\nDihedral Coeffs\n\n"

        for i in range(1, ndihedralTypes + 1):
            outCoeffs += "\t%d\tK\tn\tphi_0 (deg)\tw\t# %s\n" % (i,
                                                                 mapdTypes[i])

        if not ignoreimproper and (niDihedralTypes > 0):
            outCoeffs += "\nImproper Coeffs\n\n"

            for i in range(1, niDihedralTypes + 1):
                outCoeffs += "\t%d\tK\txi_0 (deg)\t# %s\n" % (i, mapiDTypes[i])

    if printdih and (ndihedrals > 0):
        if ignoreimproper or (niDihedralTypes == 0):
            return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles + "\n" + outDihedrals
        else:
            return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles + "\n" + outDihedrals + "\n" + outImpropers
    else:
        return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles
예제 #8
0
def main(argv=""):
  # Set default variables
  iterate = False
  iterate_from = None
  iterate_to = None
  iter_step = 1
  
  align_to = None
  
  up_pose = True
  both_poses = False
  
  generate_all_dummies = True
  generate_dummies_from = None
  generate_dummies_to = None

  global finegrain_step
  finegrain_step = 1
  
  global remove_dummies
  remove_dummies = True
  
  global dna_leading_strand_basepairs_in_advance_of_lagging_strand_when_looking_at_minor_groove
  dna_leading_strand_basepairs_in_advance_of_lagging_strand_when_looking_at_minor_groove = 2
  
  global dna_bp_counting_antiparallel
  dna_bp_counting_antiparallel = 1

  global dna_leading_strand
  dna_leading_strand = 'I'
  global  dna_leading_strand_starts_at_position
  dna_leading_strand_starts_at_position = -72
  global  dna_lagging_strand
  dna_lagging_strand = 'J'
  global  dna_lagging_strand_position_at_leading_strand_start
  dna_lagging_strand_position_at_leading_strand_start = +72


  output_prefix = "output/"
  
  # check given command-line arguments
  try:
    opts, remaining_args = getopt.getopt(argv,"dhia:gf:o:k",["help","iterate","iterate-from=","iterate-to=","iter-step=","align-to=","down-pose","both-poses","generate-all-dummies","generate-dummies-from=","generate-dummies-to=","finegrain-step=","keep-dummies","dna-leading-strand=","dna-lagging-strand=","dna-leading-start=","dna-lagging-start=","bp-counting-parallel","minor-groove-offset=","output-prefix="])
  except getopt.GetoptError:
    print 'You provided unusual arguments. Call me with -h to learn more.'
    sys.exit(2)
  for opt, arg in opts:
    if opt in ('-h', '--help'):
      print 'The following options are available:'
      print """
      -h, --help:

      -i, --iterate [False]
         --iterate-from=
         --iterate-to=
         --iter-step= [1]

      -a, --align-to= [False]
         --down-pose [False]
         --both-poses [False]

      -g, --generate-all-dummies [True]
         --generate-dummies-from=
         --generate-dummies-to=
      -f, --finegrain-step= [1]
      -k, --keep-dummies [False]

         --dna-leading-strand= [I]
         --dna-lagging-strand= [J]
         --dna-leading-start= [-72]
         --dna-lagging-start= [+72]

         --bp-counting-parallel [False]
         --minor-groove-offset= [2]

      -o, --output-prefix= [output/]

      [1]   => dna-file
      [2]   => ligand-file"""
      sys.exit()
    elif opt in ("-d"):
      logging.basicConfig(level=logging.DEBUG)
      global log
      log = logging
    elif opt in ("-i", "--iterate"):
      #log.info("Using iteration mode for aligning of ligands...")
      iterate = True
    elif opt in ("--iterate-from"):
      iterate_from = float(arg)
      #log.info("Set iterate-from: "+iterate_from)
    elif opt in ("--iterate-to"):
      iterate_to= float(arg)
      #log.info("Set iterate-to: "+iterate_to)
    elif opt in ("--iter-step"):
      iter_step = float(arg)
    elif opt in ("-a", "--align-to"):
      center_ligand_at = float(arg)
      align_to = True
    elif opt in ("--down-pose"):
      up_pose = False
    elif opt in ("--both-poses"):
      both_poses = True
    elif opt in ('-g', '--generate-all-dummies'):
      generate_all_dummies = True
    elif opt in ("--generate-dummies-from"):
      generate_dummies_from = int(arg)
      generate_all_dummies = False
    elif opt in ("--generate-dummies-to"):
      generate_dummies_to = int(arg)
      generate_all_dummies = False
    elif opt in ("-f", "--finegrain-step"):
      arg = float(arg)
      if arg > 1:
	log.warning("You provided the finegrain-step argument with a value greater than 1. This is non-sense, and will be ignored => finegrain-step = 1.")
      elif int(1/arg) != 1/arg: #checks if finegrain-step is sensible, i.e. smooth divisor of 1
	log.warning("You provided the finegrain-step argument with a value which will not add up to 1. This will result in non-uniform distribution of interpolated dummies. I will continue anyway, interpolating with the given steps until reaching 1.")
	finegrain_step = float(arg)
      else:
	log.info("Using finegraining, i.e. interpolation between dummy atoms...")
	finegrain_step = float(arg)
    elif opt in ("--dna-leading-strand"):
        dna_leading_strand = str(arg)
    elif opt in ("--dna-lagging-strand"):
        dna_lagging_strand = str(arg)
    elif opt in ("--dna-leading-start"):
        dna_leading_strand_starts_at_position = int(arg)
    elif opt in ("--dna-lagging-start"):
        dna_lagging_strand_position_at_leading_strand_start = int(arg)
    elif opt in ("--bp-counting-parallel"):
        dna_bp_counting_antiparallel = False
    elif opt in ("--minor-groove-offset"):
      dna_leading_strand_basepairs_in_advance_of_lagging_strand_when_looking_at_minor_groove = int(arg)
    elif opt in ("-o", "--output-prefix"):
      output_prefix = str(arg)+"/"
    elif opt in ("-k", "--keep-dummies"):
      remove_dummies = False
  
  # check mandatory file fields
  if not len(remaining_args) == 2:
    log.critical("You did not provide input files. Call me with -h to learn more.")
    sys.exit()
  else:
    global dna_file
    dna_file = remaining_args[0]
    ligand_file = remaining_args[1]
      
  # check argument combinations; set advanced (=clever) default values
  if (iterate_from and not iterate_to) or (iterate_to and not iterate_from):
    log.critical("You provided only iterate-from or iterate-to, the other is missing. This doesn't make sense; quitting.")
    sys.exit(2)
  elif iterate and not iterate_to and not iterate_from:
    log.critical("You wanted to iterate, but did neither provide iterate-from nor iterate-to. Please do this; I am quitting.")
    sys.exit(2) 
  elif iterate_from and iterate_to:
    if iterate_from > iterate_to:
      log.info('You gave iterate-from and iterate-to in reverse order. I changed that for you...')
      _tmp = iterate_from
      iterate_from = iterate_to
      iterate_to = _tmp
    if not iterate:
      log.info('You forgot to set iterate. Setting iterate to True, as you provided iterate-from and iterate-to.')
      iterate = True
    if not iter_step:
      log.info('Setting iter_step to 1 [default].')
      iter_step = 1
  if not iterate and not align_to:
    log.info("You chose not to perform any ligand alignments. Alright, then...")
    no_alignment = True
  elif align_to and iterate:
    log.info("You wanted to do an iteration and a single alignment. Don't know what to do; quitting.")
    sys.exit(2)
  else:
    no_alignment = None
     
     
  # ... done with configurations; start the real work!
  
  ## create ncp object
  ncp = openbabel.OBMol()
  ## create ligand object
  ligand = openbabel.OBMol()
  ## read files
  ## WATCH OUT: works only for single molecule files!
  obconversion = openbabel.OBConversion()
  obconversion.SetInFormat(ncp_format)
  if obconversion.ReadFile(ncp, dna_file):
    log.info('Successfully read DNA containing file ' + str(dna_file))
  obconversion.SetInFormat(ligand_format)
  if obconversion.ReadFile(ligand, ligand_file):
      log.info('Successfully read ligand file ' + str(ligand_file))
  
  leading_strand_phosphates, lagging_strand_phosphates = get_all_phosphates(ncp)
  
  if generate_all_dummies:
    # overriding even otherwisely set values...
    generate_dummies_from = min([int(i) for i in leading_strand_phosphates])+dna_leading_strand_basepairs_in_advance_of_lagging_strand_when_looking_at_minor_groove #-1 this does not work for antiparallel counting
    generate_dummies_to = max([int(i) for i in leading_strand_phosphates])-1
    log.debug("Generating dummies from "+ str(generate_dummies_from) + " to "+ str(generate_dummies_to))
  ncp_dummy_atoms, helix_dummy_atoms = generate_dummies(leading_strand_phosphates, lagging_strand_phosphates, generate_dummies_from, generate_dummies_to, finegrain_step)
  
  ## write out mol2 file
  obconversion.SetOutFormat("mol2")
  obconversion.WriteFile(ncp_dummy_atoms, output_prefix+"dummies.mol2")
  obconversion.SetOutFormat("pdb")
  obconversion.WriteFile(ncp_dummy_atoms, output_prefix+"dummies.pdb")
  obconversion.SetOutFormat("mol2")
  obconversion.WriteFile(helix_dummy_atoms, output_prefix+"helix.mol2")
  obconversion.SetOutFormat("pdb")
  obconversion.WriteFile(helix_dummy_atoms, output_prefix+"helix.pdb")
    
  # break here, if no alignment is wished
  if no_alignment:
    return
  
  if up_pose == True:
    posename = "up"
    antipose = "down"
  else:
    posename = "down"
    antipose = "up"
  
  if iterate:
    center = iterate_from
    iter_to = iterate_to
    iteration_step = iter_step
  if not iterate:
    center = center_ligand_at
    iteration_step = 1
    iter_to = center_ligand_at+1
    
  while center < iter_to: # + iteration_step):
    align_to_these_atoms = select_dummy_atoms(ncp_dummy_atoms, helix_dummy_atoms, center, up_pose)
    aligned_ligand, rmsd = align_ligand(align_to_these_atoms, ligand)
   
    if center > 0:
      sign = "+"
    else:
      sign = ""
    
    if aligned_ligand != None:
      log.debug("RMSD: "+ str(rmsd))
      obconversion.WriteFile(aligned_ligand, output_prefix+"ligand_aligned_to_bp" + sign + str(center) + "_"+posename+".pdb")
      with open(output_prefix+"ligand_aligned_to_bp" + sign + str(center) + "_"+posename+".log", "w") as f:
	f.write(str(rmsd)+"\n")
    
    if both_poses:
      align_to_these_atoms = select_dummy_atoms(ncp_dummy_atoms, helix_dummy_atoms, center, (not up_pose))
      aligned_ligand, rmsd = align_ligand(align_to_these_atoms, ligand)
      
      if aligned_ligand != None:
	obconversion.WriteFile(aligned_ligand, output_prefix+"ligand_aligned_to_bp" + sign + str(center) + "_"+antipose+".pdb")
	with open(output_prefix+"ligand_aligned_to_bp" + sign + str(center) + "_"+antipose+".log", "w") as f:
	  f.write(str(rmsd)+"\n")
	  
    center += iteration_step

  return
예제 #9
0
def parse_smiles(smiles):
    "parse a SMILES into a molecule"
    mol = ob.OBMol()
    _smiles_parser.ReadString(mol, smiles)
    return mol
예제 #10
0
def generate_dummies(dna_leading_strand_phosphates, dna_lagging_strand_phosphates, start_at_position, stop_at_position, finegraining_step=1, *args):
  ## this function generates (all) dummy atoms from the given phosphates

  log.info('generating dummy atom coordinates from phosphates...')
  
  ## 0.9 create dummy atom object
  dummies = openbabel.OBMol()      
  dummies.SetTitle("minor groove dummy atom cloud for "+dna_file)
  
  helix = openbabel.OBMol()      
  helix.SetTitle("minor groove dummy atom cloud for "+dna_file)
  
  
  start = 0
  stop = 0
  step = 0
  
  # 1.0 check operation mode; set start and stop accordingly
  if start_at_position < stop_at_position: #construct_dummies_from_leading_strand_position > construct_dummies_to_leading_strand_position :
    log.info("Counting upwards for dummy generation...")
    step = +1
    start = start_at_position
    stop = stop_at_position+1
  elif start_at_position > stop_at_position:
    log.info("Counting downwards for dummy generation...")
    step = -1
    start = start_at_position
    stop = stop_at_position-1
    
  # 1.1 get coordinates of relevant phosphates
  log.info('Getting coordinates of phosphates ...')
  for i in range(start, stop, step):
    lead_index = str(i)
    log.debug(lead_index)
    progress = i - dna_leading_strand_starts_at_position
    log.debug(progress)
    for mode in ["minorgroove", "helix"]:
      
      if dna_bp_counting_antiparallel:
	if mode == "minorgroove":
	  lag_index = str(dna_lagging_strand_position_at_leading_strand_start - progress + dna_leading_strand_basepairs_in_advance_of_lagging_strand_when_looking_at_minor_groove)
	elif mode == "helix":
	  lag_index = str(dna_lagging_strand_position_at_leading_strand_start - progress)
      else:
	if mode == "minorgroove":
	  lag_index = str(dna_lagging_strand_position_at_leading_strand_start + progress - dna_leading_strand_basepairs_in_advance_of_lagging_strand_when_looking_at_minor_groove)
	elif mode == "helix":
	  lag_index = str(dna_lagging_strand_position_at_leading_strand_start + progress)
      
      log.debug(lag_index)  
      log.debug('... Visiting position ' + lead_index + ' in leading strand')
      
      _vector_leading = dna_leading_strand_phosphates[lead_index] 
      _vector_lagging = dna_lagging_strand_phosphates[lag_index]
      
      ## 1.1 create coordinates of dummy atoms with linear combination of vectors   
      _coord_dummy_atom = openbabel.vector3()
      _coord_dummy_atom.SetX((_vector_leading.GetX() + _vector_lagging.GetX())/2)
      _coord_dummy_atom.SetY((_vector_leading.GetY() + _vector_lagging.GetY())/2)
      _coord_dummy_atom.SetZ((_vector_leading.GetZ() + _vector_lagging.GetZ())/2)
      
      log.debug("... Coordinates of dummy: " + str(_coord_dummy_atom.GetX()) + " " + str(_coord_dummy_atom.GetY()) + " " + str(_coord_dummy_atom.GetZ()))
      
      ## 1.2 create atom representation in openbabel
      if mode == "minorgroove":
	_new_atom = dummies.NewAtom()
      elif mode == "helix":
	_new_atom = helix.NewAtom()
      _new_atom.SetType("Du")
      _new_atom.SetAtomicNum(0)
      _new_atom.SetVector(_coord_dummy_atom)
      
      ## 1.3 create and add to residue representation
      
      if mode == "minorgroove":
	_new_res = dummies.NewResidue()
	_new_res.SetName("DUM") 
      elif mode == "helix":
	_new_res = helix.NewResidue()
	_new_res.SetName("HDU") 
      _new_res.SetNum(str(float(i)))
      log.debug("Created atom #"+_new_res.GetNumString())
      
      _new_res.SetChain("W")
      #_new_res.SetChainNum(99)
      _new_res.AddAtom(_new_atom)
      _new_res.SetAtomID(_new_atom, "dummy")
      _new_res.SetHetAtom(_new_atom, 1)
      
      ## 1.4 interpolate between dummies, if wanted (= finegraining)
      if (finegraining_step < 1) and (i != stop-1): # construct_dummies_to_leading_strand_position ):
	mantissa = finegraining_step
		
	lead_index_next = str(int(lead_index)+step)
	log.debug(lead_index_next)
	if dna_bp_counting_antiparallel:
	  lag_index_next = str(int(lag_index)-step)
	else:
	  lag_index_next = str(int(lag_index)+step)
	log.debug(lag_index_next)
	_next_vector_leading = dna_leading_strand_phosphates[lead_index_next]
	_next_vector_lagging = dna_lagging_strand_phosphates[lag_index_next]
	while mantissa < 1-finegrain_step:
	  ## 1.4.1 create coordinates of dummy atoms with linear combination of vectors   
	  _coord_dummy_atom = openbabel.vector3()
	  _coord_dummy_atom.SetX((1-mantissa)*(_vector_leading.GetX() + _vector_lagging.GetX())/2+(mantissa)*(_next_vector_leading.GetX() + _next_vector_lagging.GetX())/2)
	  _coord_dummy_atom.SetY((1-mantissa)*(_vector_leading.GetY() + _vector_lagging.GetY())/2+(mantissa)*(_next_vector_leading.GetY() + _next_vector_lagging.GetY())/2)
	  _coord_dummy_atom.SetZ((1-mantissa)*(_vector_leading.GetZ() + _vector_lagging.GetZ())/2+(mantissa)*(_next_vector_leading.GetZ() + _next_vector_lagging.GetZ())/2)
	  log.debug("... Coordinates of dummy: " + str(_coord_dummy_atom.GetX()) + " " + str(_coord_dummy_atom.GetY()) + " " + str(_coord_dummy_atom.GetZ()))
	  
	  ## 1.4.2 create atom representation in openbabel
	  if mode == "minorgroove":
	    _new_atom = dummies.NewAtom()
	  elif mode == "helix":
	    _new_atom = helix.NewAtom()
	  _new_atom.SetType("Du")
	  _new_atom.SetAtomicNum(0)
	  _new_atom.SetVector(_coord_dummy_atom)
	  
	  ## 1.4.3 create and add to residue representation
	  if mode == "minorgroove":
	    _new_res = dummies.NewResidue()
	    _new_res.SetName("DUM") 
	  elif mode == "helix":
	    _new_res = helix.NewResidue()
	    _new_res.SetName("HDU") 
	  
	  if step > 0: 
	    _new_res.SetNum(str(i+mantissa))
	  else:
	    _new_res.SetNum(str(i-mantissa))
	  
	  log.debug("Created atom #"+_new_res.GetNumString())
	  
	  _new_res.SetChain("W")
	  #_new_res.SetChainNum(99)
	  _new_res.AddAtom(_new_atom)
	  _new_res.SetAtomID(_new_atom, "dummy")
	  _new_res.SetHetAtom(_new_atom, 1)
	  
	  ## 1.4.4 try if there is a next step to take...
	  mantissa += finegraining_step

  return dummies, helix    
예제 #11
0
def align_ligand(dummies, ligand):
  # fit dummy atoms of ligand to defined positions
  log.info('Aligning ligand dummy atoms to desired dummy atoms...')

  # 0.9 create local copy, as this function would otherwise modify the given ligand 
  aligned_ligand = openbabel.OBMol(ligand)
  
  # 1.0 get dummy atoms from ligand
  log.debug('... get dummy atoms of ligand')
  ligand_dummies = get_dummies(ligand)
  
  # 1.1 get translation vector from read-in position to origin
  log.info('... determing translation vector from read-in to origin')
  translation = ligand_dummies.Center(1)
  
  ## DEBUG
  #obconversion = openbabel.OBConversion()
  #obconversion.SetOutFormat("pdb")
  #obconversion.WriteFile(ligand_dummies,"ligand_dummies_centered.pdb")
  
  # 1.2 initialize OBAlign for alignment to final destination
  log.info('... doing the alignment for dummy atoms')
  aligner = openbabel.OBAlign(dummies, ligand_dummies)
  success=aligner.Align()
  
  if success == False:
    return None, None
    
  #log.info('... done.')
  rmsd=aligner.GetRMSD()
  log.debug('RMSD of alignment: ' + str(rmsd))

  ## 1.2.1 get Rotation Matrix for alignment 
  log.info('... determining the rotation matrix')
  rotation_matrix = aligner.GetRotMatrix()
  rot = openbabel.double_array([1,2,3,4,5,6,7,8,9])
  rotation_matrix.GetArray(rot)
  
  # .. only for debugging:
  arewedebugging = log.getLogger()
  if arewedebugging.isEnabledFor(logging.DEBUG):
    log.debug('--- rotation matrix ---')
    for i in range(0,9): log.debug(str(i)+ " : " + str(rot[i]))

  # 1.3 generate positioning vector 
  ## NB: centering would not work because of rotation!!
  ## update cooordinates to new value
  aligner.UpdateCoords(ligand_dummies)
  log.info('... generating positioning vector')
  positioning = openbabel.vector3()
  ## calculate the vector for positioning to destination
  n = 0
  for atom in openbabel.OBMolAtomIter(ligand_dummies):
    n += 1
    positioning += atom.GetVector()
  positioning /= n 

  # 1.4 move all ligand atoms to fit the pose the dummy atoms have been aligned to
  
  ## 1.4.1 generate inverted translation vector to translate ligand to origin
  translation_to_origin = translation
  translation_to_origin *= -1

  ## 1.4.2 generate inverted rotation matrix to reconstruct alignment results
  rotation_inversion = rotation_matrix.transpose()
  rot_inv = openbabel.double_array([1,2,3,4,5,6,7,8,9])
  rotation_inversion.GetArray(rot_inv)
    
  ## 1.4.3 apply translation to origin, rotation, and translation to final destination
  aligned_ligand.Translate(translation_to_origin)
  aligned_ligand.Rotate(rot_inv)
  aligned_ligand.Translate(positioning)
  
  ## 1.5 clean output ligand of dummy atoms, if desired
  if remove_dummies:
    log.info('Cleaning the ligand of unwanted dummies...')
    _temp_atom = []
    for atom in openbabel.OBMolAtomIter(aligned_ligand):
      #aligned_ligand.AddResidue(atom.GetResidue())
      #aligned_ligand.AddAtom(atom)
      if atom.GetAtomicNum() == 0:
	_temp_atom.append(atom)
    for a in _temp_atom:
      aligned_ligand.DeleteAtom(a)
  #else:
    #aligned_ligand = ligand

  log.info('... returning the aligned ligand.')
  return aligned_ligand, rmsd
예제 #12
0
def atoms_to_identifiers(atoms, bonds):
    """Derive the smiles for all the organic ligands."""
    try:
        import openbabel as ob
        import pybel
    except ImportError:
        # Don't bother if no openbabel'
        return

    obmol = ob.OBMol()
    obmol.BeginModify()

    # Translation table for indexes
    seen_atoms = {}

    babel_idx = 1

    for idx, atom in enumerate(atoms):
        if atom is None or atom.is_metal:  # or atom.atomic_number == 1:
            # If we ignore them it should split the
            # ligands into fragments
            continue
        else:
            new_atom = obmol.NewAtom()
            new_atom.SetAtomicNum(atom.atomic_number)
            # so we correlate the bond index
            # to the index for the babel_mol
            seen_atoms[idx] = babel_idx
            babel_idx += 1

    for bond, bond_info in bonds.items():
        if bond[0] in seen_atoms and bond[1] in seen_atoms:

            obmol.AddBond(seen_atoms[bond[0]], seen_atoms[bond[1]],
                          OB_BOND_ORDERS[bond_info[1]])

    obmol.EndModify()

    pybelmol = pybel.Molecule(obmol)

    # Strip out stereochemistry
    full_molecule = pybelmol.write('can', opt={'i': None}).strip()

    if full_molecule == '':
        debug("OpenBabel conversion failed; try newer version")
        return

    # Fix for delocalised carboxylate detached from metals
    full_molecule = re.sub(r'C\(O\)O([)$.])', r'C(=O)O\1', full_molecule)

    # remove any lone atoms
    unique_smiles = (set(full_molecule.split(".")) - {'O', 'H', 'N'})

    identifiers = []
    for smile in unique_smiles:
        pybelmol = pybel.readstring('smi', smile)
        can_smiles = pybelmol.write('can', opt={'i': None}).strip()
        smol = Ligand(can_smiles,
                      pybelmol.write('inchi', opt={
                          'w': None
                      }).strip(),
                      pybelmol.write('inchikey', opt={
                          'w': None
                      }).strip(), sa_score(can_smiles))
        identifiers.append(smol)

    return identifiers
예제 #13
0
def return_cross_matrix_for_binder_connections(matrix,
                                               binder_smiles,
                                               fieldname_prefix=""):
    storage = ""
    ligands = []
    cross_matrix = {}
    for pos in matrix:
        filename = matrix[pos][fieldname_prefix + "filename"]
        ligands.append(openbabel.OBMol())
        obconversion.SetInFormat(filename[filename.rfind(".") + 1:])
        if obconversion.ReadFile(ligands[-1], filename):
            log.info('Successfully read ligand ' + str(filename))
        else:
            log.warning('Failed to read ligand ' + str(filename))
        matrix[pos].update({
            fieldname_prefix + "binder_atom":
            get_binder_atom(ligands[-1], binder_smiles)
        })
        #print pos
    used_indeces = []
    for current in matrix:
        #print used_indeces
        used_indeces.append(current)
        log.debug("current " + current)

        if matrix[current][fieldname_prefix + "binder_atom"] != None:
            for compare in matrix:
                log.debug("compare " + compare)
                if (matrix[compare][fieldname_prefix + "binder_atom"] !=
                        None) and (compare not in used_indeces):
                    log.debug("new value added...")

                    cross_matrix.update({
                        (current, compare): {
                            "distance":
                            matrix[current][
                                fieldname_prefix + "binder_atom"].GetDistance(
                                    matrix[compare][fieldname_prefix +
                                                    "binder_atom"]),
                            "sum_Etotal":
                            float(matrix[current][fieldname_prefix +
                                                  "E_total"]) +
                            float(
                                matrix[compare][fieldname_prefix + "E_total"])
                        }
                    })

                    orientation = [
                        current.split("_")[1],
                        compare.split("_")[1]
                    ]
                    number = [
                        float(current.split("_")[0]),
                        float(compare.split("_")[0])
                    ]

                    length_binder_same_orientation = 4
                    length_binder_opposite_orientation = 5

                    if (orientation[0] == orientation[1]) and (
                            abs(number[0] - number[1]) <
                            length_binder_same_orientation):
                        cross_matrix[(current, compare)].update(
                            {"forbidden": 1})  #most likely overlapping
                    elif (orientation[0] != orientation[1]) and (
                            abs(number[0] - number[1]) <
                            length_binder_opposite_orientation):
                        cross_matrix[(current, compare)].update(
                            {"forbidden": 2})  #most likely overlapping
                    elif (orientation[0] != orientation[1]) and (
                            abs(number[0] - number[1])
                            == length_binder_opposite_orientation):
                        cross_matrix[(current, compare)].update(
                            {"forbidden": 0.5})  #most likely neighboring
                    else:
                        cross_matrix[(current,
                                      compare)].update({"forbidden":
                                                        0})  #most likely okay
        else:
            log.info(str(current) + " did not contain a binder atom...")
    del ligands
    return cross_matrix
예제 #14
0
    def Draw(self,
             spinsystem=None,
             firstshell=[],
             secondshell=[],
             labels={},
             molSize=(450, 150)):
        # first need to check for explicit hydrogens because RDKit ignores them
        if 'H' in self.smiles:
            # need to remove it
            S = Compound(self.G.copy())
            S.remove_hydrogens()
            aliases = {}
            for node in S.G.nodes:
                aliases[node] = len(aliases) + 1
            MOL = S.MOL()
        else:
            MOL = self.MOL()
            aliases = {node: node for node in self.G}
        if len(labels) != 0:
            labels = {aliases[n] - 1: labels[n] for n in labels}
        if spinsystem is None:  # generate atom details if needed
            spinsystem = []
            for ss in self.spinsystems():
                spinsystem.extend(ss.split())

            firstshell, secondshell = [], []
            for atom in spinsystem:
                for node in self.D()[int(atom)]:
                    if str(node) not in spinsystem:
                        firstshell.extend([str(node)])
            for atom in firstshell:
                for node in self.D()[int(atom)]:
                    if str(node) not in spinsystem and str(
                            node) not in firstshell:
                        secondshell.extend([str(node)])
            spinsystem = list(set([aliases[int(a)] for a in spinsystem]))
            firstshell = list(set([aliases[int(a)] for a in firstshell]))
            secondshell = list(set([aliases[int(a)] for a in secondshell]))
            if len(labels) == 0:
                labels = {}
                for atom in spinsystem:
                    labels[atom - 1] = 'C' + str(atom)
        else:
            spinsystem = [aliases[int(n)] for n in spinsystem]
            firstshell = [aliases[int(n)] for n in firstshell]
            secondshell = [aliases[int(n)] for n in secondshell]
        # colors are RGB but normalized (/255)
        color_scheme = {
            'spinsystem': (0.91, 0.4, 0.4),
            'firstshell': (0.4, 0.55, 0.91),
            'secondshell': (0.4, 0.86, 0.66)
        }
        colors, highlight = {}, []
        for atom in spinsystem:
            colors[atom - 1] = color_scheme['spinsystem']
            highlight.extend([atom - 1])
        for atom in firstshell:
            colors[atom - 1] = color_scheme['firstshell']
            highlight.extend([atom - 1])
        for atom in secondshell:
            colors[atom - 1] = color_scheme['secondshell']
            highlight.extend([atom - 1])

        # http://rdkit.blogspot.com/2015/02/new-drawing-code.html

        mol = Chem.MolFromMolBlock(MOL)
        mc = Chem.Mol(mol.ToBinary())
        Chem.rdDepictor.Compute2DCoords(mc)
        remove_aromatics = False  # remove aromatic atoms -- doesnt work yet
        if remove_aromatics:
            # load coordinates from RDKit into OpenBabel to remove aromaticity
            obConversion = openbabel.OBConversion(
            )  # input the smiles into OBMol object, check indices maybe
            obConversion.SetInAndOutFormats('mol', 'mol')
            ob_mol = openbabel.OBMol()
            obConversion.ReadString(ob_mol, Chem.MolToMolBlock(mc))
            # remove aromatic properties
            ob_mol.UnsetAromaticPerceived()
            for atom in openbabel.OBMolAtomIter(ob_mol):
                if atom.IsAromatic():
                    atom.UnsetAromatic()
            for bond in openbabel.OBMolBondIter(ob_mol):
                if bond.IsAromatic():
                    bond.UnsetAromatic()
            # use pybel to generate MOL block
            mymol = pybel.Molecule(ob_mol)
            unset_aromaticmol = mymol.write('mol')
            print(mymol.write('smi'))
            # load new mol block back into rdkit
            mol = Chem.MolFromMolBlock(unset_aromaticmol)
            mc = Chem.Mol(mol.ToBinary())

        drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0], molSize[1])
        opts = drawer.drawOptions()
        for atom in labels:
            opts.atomLabels[atom] = labels[atom]
        drawer.DrawMolecule(mc,
                            highlightAtoms=highlight,
                            highlightAtomColors=colors,
                            highlightBonds=[])
        drawer.FinishDrawing()
        svg = drawer.GetDrawingText()
        svg = svg.replace('svg:', '')
        # need to correct the y-axis cutoff in the svg with <g transform="translate(0,15)">
        svg = svg.split('\n')  # break into list for editing purposes
        # correct the height
        whline = svg[6].split()  # call width, height line
        hchange = 30
        newheight = int(whline[1][8:whline[1].find('p')]
                        ) + hchange  # add 30 points to current height
        whline[1] = whline[1][:8] + str(newheight) + whline[1][whline[1].find(
            'p'):]  # change height portion to new height
        svg[6] = ' '.join(whline)  # change width-height line to use new height
        # correct rectangle height, location, and opacity
        opacity = 1  # 0 makes background transparent, 1 makes background white/whatever you set rectangle to
        rect = svg[7].split(
        )  # <rect style='opacity:1.0;fill:#FFFFFF;stroke:none' width='450' height='180' x='0' y='-15'> </rect>
        opacline = rect[1].split(':')
        opacline[1] = str(opacity) + opacline[1][opacline[1].find(';'):]
        rect[1] = ':'.join(opacline)
        rect[3] = rect[3][:7] + "'" + str(newheight) + "'"
        rect[5] = rect[5][:2] + "'-" + str(int(hchange / 2.0)) + "'>"
        svg[7] = ' '.join(rect)
        # now we need to add in an object delimiter so we can shift this whole thing down
        svg.insert(7, "<g transform='translate(0,15)'>")
        # now we need to add indents to every line after 7
        for l in range(8, len(svg) - 2):
            svg[l] = '  ' + svg[l]
        # now we add in the line to end the object
        svg.insert(-2, "</g>")
        return '\n'.join(svg)  # rejoin svg
예제 #15
0
def read_structures(filename=None, format=None, id_tag=None, errors="strict"):
    """read_structures(filename, format) -> (id, OBMol) iterator 
    
    Iterate over structures from filename, returning the structure
    title and OBMol for each record. The structure is assumed to be
    in normalized_format(filename, format) format. If filename is None
    then this reads from stdin instead of the named file.
    """
    if not (filename is None or isinstance(filename, basestring)):
        raise TypeError("'filename' must be None or a string")
    error_handler = error_handlers.get_parse_error_handler(errors)

    obconversion = ob.OBConversion()
    format_name, compression = io.normalize_format(filename,
                                                   format,
                                                   default=("smi", ""))
    if compression not in ("", ".gz"):
        raise ValueError("Unsupported compression type for %r" % (filename, ))

    # OpenBabel auto-detects gzip compression.

    if not obconversion.SetInFormat(format_name):
        raise ValueError("Unknown structure format %r" % (format_name, ))

    obmol = ob.OBMol()

    if not filename:
        filename = io.DEV_STDIN
        if filename is None:
            raise NotImplementedError(
                "Unable to read from stdin on this operating system")
        success = obconversion.ReadFile(obmol, filename)
        filename_repr = "<stdin>"

    else:

        # Deal with OpenBabel's logging
        if HAS_ERROR_LOG:
            ob.obErrorLog.ClearLog()
            lvl = ob.obErrorLog.GetOutputLevel()
            ob.obErrorLog.SetOutputLevel(-1)  # Suppress messages to stderr

        success = obconversion.ReadFile(obmol, filename)
        filename_repr = repr(filename)

        errmsg = None
        if HAS_ERROR_LOG:
            ob.obErrorLog.SetOutputLevel(lvl)  # Restore message level
            if ob.obErrorLog.GetErrorMessageCount():
                errmsg = _get_ob_error(ob.obErrorLog)

        if not success:
            # Either there was an error or there were no structures.
            open(filename).close(
            )  # Make sure the file can be opened for reading

            # If I get here then the file exists and is readable.

            # If there was an error message then use it.
            if errmsg is not None:
                # Okay, don't know what's going on. Report OB's error
                raise IOError(5, errmsg, filename)

    # We've opened the file. Switch to the iterator.
    return _file_reader(obconversion, obmol, success, id_tag, filename_repr,
                        error_handler)
예제 #16
0
def peptide_smile_FF(peptide, numMol):
    # parameters
    numConfs = 5
    # cluster method: (RMSD|TFD) = RMSD
    clusterMethod = "RMSD"
    clusterThreshold = 2.0
    minimizeIterations = 20

    seq = interprete(peptide)
    #
    #smi = smiles_from_seq(seq)
    smi = smiles_from_seq_cyclic(seq)
    #print (smi)
    #
    mol = Chem.MolFromSmiles(smi)
    m   = Chem.AddHs(mol)
    # generate the confomers
    conformerIds = gen_conformers(m, numConfs)
    conformerPropsDict = {}
    for conformerId in conformerIds:
        # energy minimise (optional) and energy calculation
        props = calc_energy(m, conformerId, minimizeIterations)
        conformerPropsDict[conformerId] = props

    # cluster the conformers
    rmsClusters = cluster_conformers(m, clusterMethod, clusterThreshold)
    print ("Molecule", peptide, ": generated", len(conformerIds), "conformers and", len(rmsClusters), "clusters")
    #
    rmsClustersPerCluster = []
    clusterNumber         = 0
    minEnergy             = 9999999999999
    minID                 = 0
    minCluster            = 0
    for cluster in rmsClusters:
        ##print (cluster)
        clusterNumber = clusterNumber+1
        rmsWithinCluster = align_conformers(m, cluster)
        for conformerId in cluster:
            e = props["energy_abs"]
            if e < minEnergy:
                minEnergy = e
                minID     = cluster[0]
            ##print (str(e) + " " + str (conformerId) + " " + str (clusterNumber))
            props = conformerPropsDict[conformerId]
            props["cluster_no"] = clusterNumber
            props["cluster_centroid"] = cluster[0] + 1
            idx = cluster.index(conformerId)
            if idx > 0:
                props["rms_to_centroid"] = rmsWithinCluster[idx-1]
            else:
                props["rms_to_centroid"] = 0.0

    print ( "Minimum energy: " + str(minEnergy) + " ID: " + str(minID))
    ## Save olny one file.sdf
    filename = str(peptide) + ".sdf"
    w = Chem.SDWriter(filename)

    for cluster in rmsClusters:
        for confId in cluster:
            conformerPropsTmp = conformerPropsDict[confId]
            enel = conformerPropsTmp["energy_abs"]
            if (enel == minEnergy):
                ##print ( str(enel) + " " + str(minEnergy))
                for name in m.GetPropNames():
                    m.ClearProp(name)
                conformerProps = conformerPropsDict[confId]
                m.SetIntProp("conformer_id", confId + 1)
                for key in conformerProps.keys():
                    m.SetProp(key, str(conformerProps[key]))
                e = conformerProps["energy_abs"]
                if e:
                    m.SetDoubleProp("energy_delta", e - minEnergy)
                w.write(m, confId=confId)
    w.flush()
    w.close()



    obConversion = openbabel.OBConversion()
    obConversion.SetInAndOutFormats("sdf","mol2")
    mol= openbabel.OBMol()
    obConversion.ReadFile(mol,filename)
    obConversion.WriteFile(mol,filename[:-4]+".mol2")
예제 #17
0
def generate_fragfile(filename, outtype, ffparams=None, eqgeom=False):
  # check outtype
  if outtype not in ["flex", "header", "min"]:
    sys.exit('Invalid argument indicating verbosity of .dfr (%s). Use "flex", "header" or "min".' % outtype)

  # get basename and file extension
  base, ext = os.path.splitext(filename)

  # set openbabel file format
  obConversion = openbabel.OBConversion()
  obConversion.SetInAndOutFormats(ext[1:],"xyz")

  # read molecule to OBMol object
  mol = openbabel.OBMol()
  obConversion.ReadFile(mol, filename)

  if ffparams:
    # get atomic labels from pdb
    idToAtomicLabel = {}
    for res in openbabel.OBResidueIter(mol):
      for atom in openbabel.OBResidueAtomIter(res):
        idToAtomicLabel[atom.GetId()] = res.GetAtomID(atom).strip()

    # read force field parameters and store into dictionaries
    labelToSLabel = {}
    charges = {}
    epsilons = {}
    sigmas = {}
    bonds = {}
    angles = {}
    dihedrals = {}
    impropers = {}
    with open(ffparams, 'r') as f:
      line = f.readline()
      # read nb params
      while "$bond" not in line:
        if line.strip().startswith("#") or not line.strip():
          line = f.readline()
          continue

        lbl = line.split()[0]
        charges[lbl] = line.split()[1]
        epsilons[lbl] = line.split()[2]
        sigmas[lbl] = line.split()[3]
        labelToSLabel[lbl] = line.split()[4]

        line = f.readline()

      # read bond params
      line = f.readline()
      while "$angle" not in line:
        if line.strip().startswith("#") or "$end" in line or not line.strip():
          line = f.readline()
          continue

        line = line.replace("–", "-")

        # store the constants for the order of the input and the inverse order
        consts = "\t".join(line.split()[1:])
        bonds[line.split()[0]] = consts
        bonds["-".join(line.split()[0].split("-")[::-1])] = consts

        line = f.readline()

      # read angle params
      line = f.readline()
      while "$dihedral" not in line:
        if line.strip().startswith("#") or "$end" in line or not line.strip():
          line = f.readline()
          continue

        line = line.replace("–", "-")

        # store the constants for the order of the input and the inverse order
        consts = "\t".join(line.split()[1:])
        angles[line.split()[0]] = consts
        angles["-".join(line.split()[0].split("-")[::-1])] = consts

        line = f.readline()

      # read dihedrals
      line = f.readline()
      while "$improper" not in line:
        if line.strip().startswith("#") or "$end" in line or not line.strip():
          line = f.readline()
          continue

        line = line.replace("–", "-")

        # store the constants for the order of the input and the inverse order
        consts = "\t".join(line.split()[1:])
        dihedrals[line.split()[0]] = consts
        dihedrals["-".join(line.split()[0].split("-")[::-1])] = consts

        line = f.readline()

      # read impropers
      line = f.readline()
      while line:
        if line.strip().startswith("#") or "$end" in line or not line.strip():
          line = f.readline()
          continue

        line = line.replace("–", "-")

        # store the constants for the order of the input and the inverse order
        consts = "\t".join(line.split()[1:])
        impropers[line.split()[0]] = consts
        impropers["-".join(line.split()[0].split("-")[::-1])] = consts

        line = f.readline()

    # check if there are unused labels
    for lbl in charges.keys():
      fnd = False
      for i in idToAtomicLabel:
        if lbl == idToAtomicLabel[i]:
          fnd = True
          break
      if not fnd:
        print("!!! WARNING: There are unused atoms in your parameter file (%s) !!!" % lbl)


  # split the molecule
  fragments, fragConnection, dummyToAtom = split_mol_fragments_daylight(mol)

  # dummy atoms ids
  dummyAtoms = dummyToAtom.keys()

  # write molecule to .txt file (passed as ljname to DICE)
  with open(base+".txt","w") as f:
    f.write("*\n1\n")   
    atomToPrint = []
    for frag in fragments:
      fragAtomIterator = openbabel.OBMolAtomIter(frag)
      for atom in fragAtomIterator:
        if atom.GetId() not in dummyAtoms:
          atomToPrint.append(atom)
    # print number of atoms
    f.write(str(len(atomToPrint))+" \t %s (generated with fragGen)\n"%os.path.basename(base))
    # dictionary associating Atomic number with rdf label
    rdfs = {}
    rdf_label = 1
    if ffparams:
      # sort atoms by index and print (this prints the atoms, e.g., in the same order of the xyz input)
      for atom in sorted(atomToPrint, key=lambda atom: atom.GetId()):
        if atom.GetAtomicNum() not in rdfs.keys():
          rdfs[atom.GetAtomicNum()] = str(rdf_label)
          rdf_label += 1
        f.write(rdfs[atom.GetAtomicNum()]+" "+str(atom.GetAtomicNum())+"  \t"+str(atom.GetX())+"      \t"+str(atom.GetY())+"      \t"+str(atom.GetZ())+"      \t"+charges[idToAtomicLabel[atom.GetId()]]+"\t"+epsilons[idToAtomicLabel[atom.GetId()]]+"\t"+sigmas[idToAtomicLabel[atom.GetId()]]+"\n") 
      f.write("$end\n")
    else:
      # sort atoms by index and print (this prints the atoms, e.g., in the same order of the xyz input)
      for atom in sorted(atomToPrint, key=lambda atom: atom.GetId()):
        if atom.GetAtomicNum() not in rdfs.keys():
          rdfs[atom.GetAtomicNum()] = str(rdf_label)
          rdf_label += 1
        f.write(rdfs[atom.GetAtomicNum()]+" "+str(atom.GetAtomicNum())+"  \t"+str(atom.GetX())+"      \t"+str(atom.GetY())+"      \t"+str(atom.GetZ())+"      \t"+"q"+"\t"+"epsilon"+"\t"+"sigma\n") 
      f.write("$end\n")

  # write info to dfr file
  with open(base+".dfr","w") as f:
    # fragments and fragments connections are printed to every outtype
    f.write("$atoms fragments\n")
    fragslst = []
    for frag in fragments:
      f.write(frag.GetTitle()+"\t[ ")
      fragAtomIterator = openbabel.OBMolAtomIter(frag)
      atomlst = [str(dummyToAtom[x.GetId()]+1) if x.GetId() in dummyAtoms else str(x.GetId()+1) for x in fragAtomIterator]
      fragslst.append(atomlst)
      for atom in atomlst:
        f.write(atom+"\t")                    
      if outtype == "min" or outtype == "header":
        f.write("] R\n")
      else:
        f.write("] F\n")
    f.write("$end atoms fragments\n") 

    f.write("\n$fragment connection\n")
    for frag1, frag2 in fragConnection:
      f.write(frag1+"\t"+frag2+"\n")
    f.write("$end fragment connection\n")

    # bonds are printed to every outtype that is not header, since we need a connection matrix
    if outtype != "header":
      f.write("\n$bond\n")
      bondIterator = openbabel.OBMolBondIter(mol)
      if ffparams:
        for bond in bondIterator:
          try:
            if eqgeom:
              f.write(str(bond.GetBeginAtom().GetId()+1)+" "+str(bond.GetEndAtom().GetId()+1)+"  \t"+bonds[labelToSLabel[idToAtomicLabel[bond.GetBeginAtom().GetId()]]+"-"+labelToSLabel[idToAtomicLabel[bond.GetEndAtom().GetId()]]].split()[0]+"\t"+str("%.6f" % bond.GetLength())+"\n")
            else:
              f.write(str(bond.GetBeginAtom().GetId()+1)+" "+str(bond.GetEndAtom().GetId()+1)+"  \t"+bonds[labelToSLabel[idToAtomicLabel[bond.GetBeginAtom().GetId()]]+"-"+labelToSLabel[idToAtomicLabel[bond.GetEndAtom().GetId()]]]+"\n")
          except KeyError as e:
            print("The parameters for atoms %d %d (%s) was not found in the bonds list\n" % (bond.GetBeginAtom().GetId()+1,bond.GetEndAtom().GetId()+1,e))
            raise                        
      else:
        for bond in bondIterator:
          f.write(str(bond.GetBeginAtom().GetId()+1)+" "+str(bond.GetEndAtom().GetId()+1)+"  \t0.0\t"+str("%.6f" % bond.GetLength())+"\n")
      f.write("$end bond\n")

    # angles are only printed for outtype flex
    if outtype == "flex":
      f.write("\n$angle\n")
      angleIterator = openbabel.OBMolAngleIter(mol)
      if ffparams:
        for angle in angleIterator:
          try:
            if eqgeom:
              atom2 = mol.GetAtomById(angle[0])
              atom1 = mol.GetAtomById(angle[1])
              atom3 = mol.GetAtomById(angle[2])
              aparams = angles[labelToSLabel[idToAtomicLabel[angle[1]]]+"-"+labelToSLabel[idToAtomicLabel[angle[0]]]+"-"+labelToSLabel[idToAtomicLabel[angle[2]]]].split()
              f.write(str(angle[1]+1)+" "+str(angle[0]+1)+" "+str(angle[2]+1)+"   \t"+aparams[0]+"\t"+aparams[1]+"\t"+str("%.6f" % mol.GetAngle(atom1,atom2,atom3))+"\n")
            else:
              f.write(str(angle[1]+1)+" "+str(angle[0]+1)+" "+str(angle[2]+1)+"   \t"+angles[labelToSLabel[idToAtomicLabel[angle[1]]]+"-"+labelToSLabel[idToAtomicLabel[angle[0]]]+"-"+labelToSLabel[idToAtomicLabel[angle[2]]]]+"\n")
          except KeyError as e:
            print("The parameters for atoms %d %d %d (%s) was not found in the angles list\n" % (angle[1]+1,angle[0]+1,angle[2]+1,e))
            raise            
      else:
        for angle in angleIterator:
          # carefully select the atoms to find the angle
          atom2 = mol.GetAtomById(angle[0])
          atom1 = mol.GetAtomById(angle[1])
          atom3 = mol.GetAtomById(angle[2])
          f.write(str(angle[1]+1)+" "+str(angle[0]+1)+" "+str(angle[2]+1)+"   \tharmonic\tK\t"+str("%.6f" % mol.GetAngle(atom1,atom2,atom3))+"\n")
      f.write("$end angle\n")

    # all the dihedrals are printed to outtype flex, but only connection between fragments are printed if outtype is min
    if outtype == "flex":
      f.write("\n$dihedral\n")
      torsionIterator = openbabel.OBMolTorsionIter(mol)
      if ffparams:
        for torsional in torsionIterator:
          # Need to sum 1: http://forums.openbabel.org/Rotable-bonds-tp957795p957798.html
          torsidx = [str(x+1) for x in torsional]
          try:
            f.write(torsidx[0]+" "+torsidx[1]+" "+torsidx[2]+" "+torsidx[3]+"   \t"+dihedrals["-".join([labelToSLabel[idToAtomicLabel[x]] for x in torsional])]+"\n")
          except KeyError as e:
            print("The parameters for atoms %s %s %s %s (%s) was not found in the dihedrals list\n" % (torsidx[0],torsidx[1],torsidx[2],torsidx[3],e))
            raise
      else:
        for torsional in torsionIterator:
          # Need to sum 1: http://forums.openbabel.org/Rotable-bonds-tp957795p957798.html
          torsional = [str(x+1) for x in torsional]
          f.write(torsional[0]+" "+torsional[1]+" "+torsional[2]+" "+torsional[3]+"   \tTYPE\tV1\tV2\tV3\tf1\tf2\tf3\n")
      f.write("$end dihedral\n")

      # improper dihedral = carbon with only 3 atoms connected to it (SP2 hybridization)
      # angle found following this definition --> http://cbio.bmt.tue.nl/pumma/index.php/Theory/Potentials
      f.write("\n$improper dihedral\n")
      atomIterator = openbabel.OBMolAtomIter(mol)
      for atom in atomIterator:
        # print(atom.GetHyb(), atom.GetAtomicNum(), atom.GetValence())
        # if atom.GetAtomicNum() == 6 and atom.GetValence() == 3:
        if ob3 == False:
          condImp = atom.GetHyb() == 2 and atom.GetValence() == 3
        else:
          condImp = atom.GetHyb() == 2 and atom.GetExplicitDegree() == 3
        if condImp:
          bondIterator = atom.BeginBonds()
          nbrAtom = atom.BeginNbrAtom(bondIterator)
          connectedAtoms = []
          connectedAtoms.append(nbrAtom)
          for i in range(2):
            nbrAtom = atom.NextNbrAtom(bondIterator)
            connectedAtoms.append(nbrAtom)
          if ffparams:
            torsional = [atom.GetId(), connectedAtoms[0].GetId(), connectedAtoms[1].GetId(), connectedAtoms[2].GetId()]
            # create all the permutations to check if one is found
            perms = list(itertools.permutations(torsional[1:]))
            nfound = 0
            for perm in perms:
              try:
                joined = "-".join([labelToSLabel[idToAtomicLabel[torsional[0]]]]+[labelToSLabel[idToAtomicLabel[x]] for x in perm])
                f.write(str(torsional[0]+1)+" "+str(torsional[1]+1)+" "+str(torsional[2]+1)+" "+str(torsional[3]+1)+"    \t"+impropers[joined]+"\n")
              except:
                nfound += 1

            if nfound == len(perms):
              joined = "-".join([labelToSLabel[idToAtomicLabel[torsional[0]]]]+[labelToSLabel[idToAtomicLabel[x]] for x in perms[0]])
              raise KeyError("The key %s (or its permutations) were not found in the improper dihedrals list\n" % (joined))
          else:
            torsional = [atom.GetId()+1, connectedAtoms[0].GetId()+1, connectedAtoms[1].GetId()+1, connectedAtoms[2].GetId()+1]
            torsionAngle = mol.GetTorsion(torsional[0],torsional[1],torsional[2],torsional[3])
            f.write(str(torsional[0])+" "+str(torsional[1])+" "+str(torsional[2])+" "+str(torsional[3])+"    \tV2\t"+str("%.6f" % torsionAngle)+"\n")
      f.write("$end improper dihedral\n")
    
    elif outtype == "min":
      torsionIterator = openbabel.OBMolTorsionIter(mol)
      # tjf = torsionals that join fragments
      tjf = []
      # find the tjfs by checking if all the atoms of a torsional belong to the same fragment
      for tors in torsionIterator:
        tors = [str(x+1) for x in tors]
        istjf = True
        for atomlst in fragslst:
          if (tors[0] in atomlst) and (tors[1] in atomlst) and (tors[2] in atomlst) and (tors[3] in atomlst):
            istjf = False
            break
        if istjf:
          tjf.append(tors)

      f.write("\n$dihedral\n")
      if ffparams:
        for torsidx in tjf:
          torsional = [int(x)-1 for x in torsidx]
          try:
            f.write(torsidx[0]+" "+torsidx[1]+" "+torsidx[2]+" "+torsidx[3]+"   \t"+dihedrals["-".join([labelToSLabel[idToAtomicLabel[x]] for x in torsional])]+"\n")
          except KeyError as e:
            print("The parameters for atoms %s %s %s %s (%s) was not found in the dihedrals list\n" % (torsidx[0],torsidx[1],torsidx[2],torsidx[3],e))
            raise
      else:
        for torsional in tjf:
          f.write(torsional[0]+" "+torsional[1]+" "+torsional[2]+" "+torsional[3]+"   \tTYPE\tV1\tV2\tV3\tf1\tf2\tf3\n")
      f.write("$end dihedral\n")

  # create directory to store the fragments
  if not os.path.exists(base+"_fragments"):
    os.makedirs(base+"_fragments")

  # write framents to the cml files
  for frag in fragments:
    obConversion.WriteFile(frag, os.path.join(base+"_fragments",os.path.basename(filename).split(".")[0]+"_fragment"+frag.GetTitle()+".xyz"))
예제 #18
0
def OBMoleculeFromFilenameAndFormat(filename, file_format):
	obc = openbabel.OBConversion()
	obc.SetInFormat(file_format)
	mol = openbabel.OBMol()
	obc.ReadFile(mol, filename)
	return mol
예제 #19
0
def conf_search(selection='all',
                forcefield='MMFF94s',
                method='Weighted',
                nsteps1=500,
                conformers=25,
                lowest_conf=5):
    pdb_string = cmd.get_pdbstr(selection)
    name = cmd.get_legal_name(selection)
    obconversion = ob.OBConversion()
    obconversion.SetInAndOutFormats('pdb', 'pdb')
    mol = ob.OBMol()
    obconversion.ReadString(mol, pdb_string)
    mol.AddHydrogens()
    ff = ob.OBForceField.FindForceField(
        forcefield)  ## GAFF, MMFF94s, MMFF94, UFF, Ghemical
    ff.Setup(mol)
    if method == 'Weighted':
        ff.WeightedRotorSearch(int(conformers), int(nsteps1))
    elif method == 'Random':
        ff.RandomRotorSearch(int(conformers), int(nsteps1))
    else:
        ff.SystematicRotorSearch(int(nsteps1))
    if name == 'all':
        name = 'all_'
    if method in ['Weighted', 'Random']:
        ff.GetConformers(mol)
        print('##############################################')
        print('   Conformer    |         Energy      |  RMSD')
        nrg_unit = ff.GetUnit()
        rmsd = 0
        ff.GetCoordinates(mol)
        nrg = ff.Energy()
        conf_list = []
        for i in range(conformers):
            mol.SetConformer(i)
            ff.Setup(mol)
            nrg = ff.Energy()
            conf_list.append((nrg, i))
        conf_list.sort()
        lenght_conf_list = len(conf_list)
        if lowest_conf > lenght_conf_list:
            lowest_conf = lenght_conf_list
        for i in range(lowest_conf):
            nrg, orden = conf_list[i]
            name_n = '%s%02d' % (name, i)
            cmd.delete(name_n)
            mol.SetConformer(orden)
            pdb_string = obconversion.WriteString(mol)
            cmd.read_pdbstr(pdb_string, name_n)
            if i != 0:
                rmsd = cmd.fit(name_n, '%s00' % name, quiet=1)
            print('%15s | %10.2f%9s |%6.1f' % (name_n, nrg, nrg_unit, rmsd))
        print('##############################################')
    else:
        ff.GetCoordinates(mol)
        nrg = ff.Energy()
        pdb_string = obconversion.WriteString(mol)
        cmd.delete(name)
        cmd.read_pdbstr(pdb_string, name)
        print('#########################################')
        print('The Energy of %s is %8.2f %s       ' %
              (name, nrg, ff.GetUnit()))
        print('#########################################')
예제 #20
0
def samplingWithConstantReactionCoordinate(mol, conf, ff, angle):

    assert mol != None and ff != None

    whichAtomsForDih = conf["whichAtomsForDih"]
    if "whichAtomsForAux" in conf:
        whichAtomsForAux = conf["whichAtomsForAux"]
    prefix = conf["prefix"]
    nOfSamples = conf["nOfSamples"]
    nOfStepsInBetween = conf["nOfStepsInBetween"]
    twistPeriod = conf["twistPeriod"]
    nuTimesTimeStep = conf["nuTimesTimeStep"]
    timeStep = conf["timeStep"]
    timeStep2 = conf["timeStep2"]
    binWidth = conf["binWidth"]
    idleSteps = conf["idleSteps"]

    random.seed(13)
    relevantCoordinates = []
    for atomNumber in whichAtomsForDih:
        for i in xrange(3):
            relevantCoordinates.append(3 * (atomNumber - 1) + i)
    # for testing purposes
    # relevantCoordinates = [i for i in xrange(33)]

    # constants
    h = 0.0001  # 10^-4 seemed best

    def reactionCoordinate(x):
        return calcDih(x, whichAtomsForDih)

    mol.SetTorsion(whichAtomsForDih[0], whichAtomsForDih[1],
                   whichAtomsForDih[2], whichAtomsForDih[3],
                   angle * math.pi / 180.)
    nOfAtoms = mol.NumAtoms()
    tmp = openbabel.doubleArray_frompointer(mol.GetCoordinates())
    coords = 3 * nOfAtoms * [0]
    for i in xrange(3 * nOfAtoms):
        coords[i] = tmp[i]

    masses = [0] * 3 * nOfAtoms
    for i in xrange(nOfAtoms):
        masses[3 * i] = masses[3 * i + 1] = masses[
            3 * i + 2] = 1000 * mol.GetAtom(i + 1).GetAtomicMass()

    # data storage and calculation
    dataCalculator = DataAndCalculations(coords, masses)

    # data extracted from the simulation will be storred in the following data structures:
    index = 0
    lowestEnergy = 100000000
    configurationWithLowestEnergy = None
    dihedrals = MEMORY_BUFFER_SIZE * [None]
    energies = MEMORY_BUFFER_SIZE * [None]
    Zksis = MEMORY_BUFFER_SIZE * [None]
    mksi_gradU_invMasses_gradKsis = MEMORY_BUFFER_SIZE * [None]
    dAdKsis = MEMORY_BUFFER_SIZE * [None]
    #TODO: allProductsMatrices = MEMORY_BUFFER_SIZE * [ None ]
    ZksiToMinusHalf_ACCUMULATED = 0
    mksi_gradU_invMasses_gradKsi_ACCUMULATED = 0
    dAdKsi_ACCUMULATED = 0
    allProductsMatrices_ACCUMULATED = initProductsMatrices(nOfAtoms)

    # quality / speed testing:
    noShiftTimes = []
    dihedrals_aux = []
    shiftTime = 0
    nOfAccepts = 0

    # tmp for simulation purposes:
    coords = [0] * 3 * nOfAtoms

    convergenceOutput = open(
        "convOutput_angle=%.2f_timestep=%s_twistPeriod=%d_nOfSamples=%d_binWidth=%s_idleSteps=%d.dat"
        % (angle, str(timeStep), twistPeriod, nOfSamples, str(binWidth),
           IDLE_STEPS), "w")
    convergenceOutput.write(
        "ZksiToMinusHalf_ACCUMULATED\tdAdKsi-<m_ksi gradU.M^(-1).gradKsi>\tdAdKsi\t\t<m_ksi gradU.M^(-1).gradKsi>\t-<v.grad(m_ksi gradKsi).v>\t-kT<m_ksi div(M^(-1).gradKsi)>\n"
    )

    gatherStatisticsFlag = False
    start = time.clock()

    if LARGE_OUTPUT:
        generalOutputStream = gzip.open(
            PATH_TO_LARGE_OUTPUT +
            "generalOutput_prefix=%s_angle=%.2f_timestep=%s_twistPeriod=%d_nOfSamples=%d_binWidth=%s_idleSteps=%d.gz"
            % (prefix, angle, str(timeStep), twistPeriod, nOfSamples,
               str(binWidth), idleSteps), "wb")
        generalOutputStream.write(
            "dihedral\t\t\tdihedral_aux\t\t\tenergy\t\t\tdAdKsi\t\t\t<...gradU...>\t\t\tZksi\n"
        )
        fullMatricesOutputStreams = {}
        for key in allProductsMatrices_ACCUMULATED.keys():
            fullMatricesOutputStreams[key] = gzip.open(
                PATH_TO_LARGE_OUTPUT +
                "matrices_prefix=%s_angle=%.2f_timestep=%s_twistPeriod=%d_nOfSamples=%d_binWidth=%s_idleSteps=%d_interaction=%s.gz"
                % (prefix, angle, str(timeStep), twistPeriod, nOfSamples,
                   str(binWidth), idleSteps, key), "w")

    allProductsMatrices = MEMORY_BUFFER_SIZE * [None]

    for iteration in xrange(nOfSamples * nOfStepsInBetween):

        if gatherStatisticsFlag == False:
            dataCalculator.idleKSteps(idleSteps, mol, relevantCoordinates, ff,
                                      reactionCoordinate, h, timeStep,
                                      timeStep2, nuTimesTimeStep)
            gatherStatisticsFlag = True
        else:
            #gradKsi = calcGrad(mol,reactionCoordinate,h)
            #dataCalculator.calculateZksi( gradKsi )
            #energy = dataCalculator.calculateGradU(ff,mol) this is now in the AndersenIntegrator method

            # ONE STEP OF SIMULATION:
            energy = dataCalculator.AndersenIntegrator(mol,
                                                       relevantCoordinates, ff,
                                                       h, reactionCoordinate,
                                                       timeStep, timeStep2,
                                                       nuTimesTimeStep)

            if numpy.abs(reactionCoordinate(mol) - angle) > binWidth:
                print "dihedral = ", reactionCoordinate(mol)
                mol.SetTorsion(whichAtomsForDih[0], whichAtomsForDih[1],
                               whichAtomsForDih[2], whichAtomsForDih[3],
                               angle * math.pi / 180.)
                tmp = openbabel.doubleArray_frompointer(mol.GetCoordinates())
                for i in xrange(3 * nOfAtoms):
                    coords[i] = tmp[i]
                dataCalculator.reset(coords)
                print "shift!!!!"
                newAngle = reactionCoordinate(mol)
                print "now it's dihedral = ", newAngle
                if numpy.abs(newAngle - angle) > binWidth:
                    print "WARNING!!!"
                    obConversion.WriteFile(
                        mol,
                        "ERROR_ANGLE=%.3f_ENERGY=%.5f.mol2" % (angle, energy))
                    os.exit()
                noShiftTimes.append(shiftTime)
                shiftTime = 0
                gatherStatisticsFlag = False
                continue
            else:
                shiftTime += 1

            # remember lowest energy configuration (for visual verification)
            if energy < lowestEnergy:
                lowestEnergy = energy
                configurationWithLowestEnergy = openbabel.OBMol(mol)

            # calculate second derivatives of ksi
            dataCalculator.calculateHessianDiagonal(mol, relevantCoordinates,
                                                    reactionCoordinate, h)
            Zksi = dataCalculator.getZksi()
            gradKsi = dataCalculator.getGradKsi()

            if Zksi == 0: print gradKsi

            tmp_mksi_gradU_invMasses_gradKsi = dataCalculator.getMksi_gradU_invMasses_gradKsi(
            )  # is multiplied by Zksi^(-0.5) in this method
            tmp_dAdKsi = dataCalculator.getKsiDerivativeOfFreeEnergy(
            )  # is multiplied by Zksi^(-0.5) in this method
            if tmp_mksi_gradU_invMasses_gradKsi != None and tmp_dAdKsi != None:
                mksi_gradU_invMasses_gradKsi_ACCUMULATED += tmp_mksi_gradU_invMasses_gradKsi
                dAdKsi_ACCUMULATED += tmp_dAdKsi
                ZksiToMinusHalf = Zksi**(-0.5)
                ZksiToMinusHalf_ACCUMULATED += ZksiToMinusHalf
                dAdKsis[index] = tmp_dAdKsi / ZksiToMinusHalf
                if "whichAtomsForAux" in conf:
                    dihedrals_aux.append(mol.GetTorsion(*whichAtomsForAux))
                else:
                    dihedrals_aux.append(-10000)
                dihedral = reactionCoordinate(mol)
                energies[index] = energy
                dihedrals[index] = dihedral
                Zksis[index] = Zksi
                mksi_gradU_invMasses_gradKsis[
                    index] = tmp_mksi_gradU_invMasses_gradKsi / ZksiToMinusHalf
                productsMatrices = initProductsMatrices(nOfAtoms)
                variousForces = ff.GetVariousForces()
                products = parseVariousForces2(variousForces, gradKsi, Zksi,
                                               masses)
                fillOutProductsMatrices(products, productsMatrices)
                addToAllProductsMatrices(
                    allProductsMatrices_ACCUMULATED, productsMatrices,
                    nOfAtoms, ZksiToMinusHalf
                )  # it's where productsMatrices are multiplied by ZksiToMinusHalf
                allProductsMatrices[index] = productsMatrices
                if numpy.abs(dihedrals[index] - angle) > binWidth:
                    print "gathering WRONG stats for dihedral = ", dihedrals[
                        -1]
                    sys.exit(1)
                if energies[-1] != None:
                    if LARGE_OUTPUT:
                        for i in xrange(MEMORY_BUFFER_SIZE):
                            generalOutputStream.write(
                                "%f\t\t\t%f\t\t\t%f\t\t\t%f\t\t\t%f\t\t\t%f\n"
                                % (dihedrals[i], dihedrals_aux[i], energies[i],
                                   dAdKsis[i],
                                   mksi_gradU_invMasses_gradKsis[i], Zksis[i]))
                    index = 0
                    dihedrals = MEMORY_BUFFER_SIZE * [None]
                    energies = MEMORY_BUFFER_SIZE * [None]
                    Zksis = MEMORY_BUFFER_SIZE * [None]
                    mksi_gradU_invMasses_gradKsis = MEMORY_BUFFER_SIZE * [None]
                    dAdKsis = MEMORY_BUFFER_SIZE * [None]
                    for i in xrange(MEMORY_BUFFER_SIZE):
                        if LARGE_OUTPUT:
                            for key in fullMatricesOutputStreams.keys():
                                for row in allProductsMatrices[i][key]:
                                    for elementInRow in row:
                                        if abs(elementInRow) > 0.000000000000:
                                            fullMatricesOutputStreams[
                                                key].write("%f " %
                                                           elementInRow)
                                        else:
                                            fullMatricesOutputStreams[
                                                key].write("0 ")
                                    fullMatricesOutputStreams[key].write("\n")
                    allProductsMatrices = MEMORY_BUFFER_SIZE * [None]
                else:
                    index += 1

            if numpy.mod(iteration + 1,
                         int(0.001 * nOfStepsInBetween * nOfSamples)) == 0:
                gradKsi = dataCalculator.getGradKsi()
                percent = int(
                    1000.0 * iteration / nOfSamples / nOfStepsInBetween) / 10.
                elapsed = (time.clock() - start)
                start = time.clock()
                outTmp = "\t%.8f\t\t\t%.8f\t\t\t%.8f\t\t%.8f\t\t\t%.8f\t\t\t%.8f\t\t(%s percents, one took %s secs)\n" % (
                    ZksiToMinusHalf_ACCUMULATED,
                    (dAdKsi_ACCUMULATED -
                     mksi_gradU_invMasses_gradKsi_ACCUMULATED) /
                    ZksiToMinusHalf_ACCUMULATED,
                    dAdKsi_ACCUMULATED / ZksiToMinusHalf_ACCUMULATED,
                    mksi_gradU_invMasses_gradKsi_ACCUMULATED /
                    ZksiToMinusHalf_ACCUMULATED, 0, 0, str(percent),
                    str(elapsed))
                convergenceOutput.write(outTmp)
                print outTmp

            # SETTING NEW COORDINATES:
            #mol.SetCoordinates( openbabel.double_array(nextCoords) )   now in the AndersenIntegrator

            # CHECKING THE H-N-C-C DIHEDRAL ANGLE:
            if "whichAtomsForAux" in conf and numpy.mod(
                    iteration + 1, twistPeriod) == 0:
                oldAngle = mol.GetTorsion(*whichAtomsForAux)
                newAngle = random.random() * 359.999 - 179.999
                mol.SetTorsion(whichAtomsForAux[3], whichAtomsForAux[2],
                               whichAtomsForAux[1], whichAtomsForAux[0],
                               newAngle * math.pi / 180.)
                ff.Setup(mol)
                newEnergy = ff.Energy()
                if newEnergy <= energy or math.exp(
                    (energy - newEnergy) / BOLTZMANN_CONSTANT /
                        TEMPERATURE) > random.random():
                    tmp = openbabel.doubleArray_frompointer(
                        mol.GetCoordinates())
                    coords = 3 * nOfAtoms * [0]
                    for i in xrange(3 * nOfAtoms):
                        coords[i] = tmp[i]
                    dataCalculator.reset(coords)
                    nOfAccepts += 1
                    gatherStatisticsFlag = False
                else:
                    mol.SetTorsion(whichAtomsForAux[3], whichAtomsForAux[2],
                                   whichAtomsForAux[1], whichAtomsForAux[0],
                                   oldAngle * math.pi / 180.)

    if LARGE_OUTPUT:
        for i in xrange(index):
            generalOutputStream.write(
                "%f\t\t\t%f\t\t\t%f\t\t\t%f\t\t\t%f\t\t\t%f\n" %
                (dihedrals[i], dihedrals_aux[i], energies[i], dAdKsis[i],
                 mksi_gradU_invMasses_gradKsis[i], Zksis[i]))
        generalOutputStream.close()
        for key in fullMatricesOutputStreams.keys():
            for i in xrange(index):
                for row in allProductsMatrices[i][key]:
                    for elementInRow in row:
                        if abs(elementInRow) > 0.00000000000:
                            fullMatricesOutputStreams[key].write("%f " %
                                                                 elementInRow)
                        else:
                            fullMatricesOutputStreams[key].write("0 ")
                    fullMatricesOutputStreams[key].write("\n")
            fullMatricesOutputStreams[key].close()

    check_ACCUMULATED = 0
    for key in allProductsMatrices_ACCUMULATED.keys():
        for i in xrange(nOfAtoms):
            for j in xrange(nOfAtoms):
                allProductsMatrices_ACCUMULATED[key][i][
                    j] /= ZksiToMinusHalf_ACCUMULATED
                check_ACCUMULATED += allProductsMatrices_ACCUMULATED[key][i][j]

    fileNames = glob.glob("./lowestEnergyConfigurations/%s*ANGLE=%.3f*" %
                          (conf["prefix"], angle))
    if len(fileNames) > 1:
        print "what?"
    elif len(fileNames) == 1:
        lowestEnergySoFar = float(
            fileNames[0].split("ENERGY=")[1].split(".mol2")[0])
        if lowestEnergySoFar > lowestEnergy:
            obConversion.WriteFile(
                configurationWithLowestEnergy,
                "./lowestEnergyConfigurations/%s_ANGLE=%.3f_ENERGY=%.5f.mol2" %
                (conf["prefix"], angle, lowestEnergy))
            os.remove(fileNames[0])
    elif len(fileNames) == 0:
        obConversion.WriteFile(
            configurationWithLowestEnergy,
            "./lowestEnergyConfigurations/%s_ANGLE=%.3f_ENERGY=%.5f.mol2" %
            (conf["prefix"], angle, lowestEnergy))

    print
    print "The following two quantities should be approximately equal:"
    print "check_ACCUMULATED = \t\t\t\t", check_ACCUMULATED / 2
    print "mksi_gradU_invMasses_gradKsi_ACCUMULATED = \t", mksi_gradU_invMasses_gradKsi_ACCUMULATED / ZksiToMinusHalf_ACCUMULATED

    print
    print "Probability of acceptance in the Monte Carlo shifts keeping the right dihedral angle: "
    print "P(acc) = ", float(
        nOfAccepts) / nOfSamples / nOfStepsInBetween * twistPeriod
    print "With a mean shift time of ", int(numpy.mean(noShiftTimes))

    convergenceOutput.close()

    return allProductsMatrices_ACCUMULATED, noShiftTimes, ZksiToMinusHalf_ACCUMULATED
예제 #21
0
    def pic(self, filename, picformat='svg'):
        """
        Generates a graphical file with 2D-representation of the resonance structure
        """
        try:
            import openbabel as ob
        except:
            print "Cannot import openbabel"
            return

        #ValEl = {'H':1, 'B':3,'C':4,'N':5,'O':6,'F':7,'S':6}
        #ValEl = {'1':1, '5':3,'6':4,'7':5,'8':6,'9':7,'16':6}
        # Import Element Numbers
        ati = []
        Sym2Num = ob.OBElementTable()
        for a in self.symbols:
            ElNum = Sym2Num.GetAtomicNum(a)
            ati.append(ElNum)

        # Import connections
        conn = self.data

        mol = ob.OBMol()

        # Create atoms
        for a in ati:
            at = ob.OBAtom()
            at.SetAtomicNum(a)
            mol.AddAtom(at)

        # Create connections
        val = []
        total_LP = 0
        for i in range(len(conn)):
            total_LP += conn[i][i]

        for i in range(len(conn)):
            val.append(conn[i][i] * 2)
            for j in range(i):
                if conn[i][j] == 0:
                    continue
                val[i] += conn[i][j]
                val[j] += conn[i][j]
                atA = mol.GetAtomById(i)
                atB = mol.GetAtomById(j)

                b = ob.OBBond()
                b.SetBegin(atA)
                b.SetEnd(atB)
                b.SetBO(int(conn[i][j]))
                mol.AddBond(b)
        for i in range(len(conn)):
            atA = mol.GetAtomById(i)
            atAN = atA.GetAtomicNum()
            FormValEl = CountValenceEl(atAN)
            #if total_LP == 0:
            #    if atAN == 1:
            #        FullShell = 2
            #    else:
            #        FullShell = 8
            #    FormCharge = FormValEl + int(val[i]) - FullShell
            #else:
            FormCharge = int(FormValEl - val[i])
            #print "atAN, FormValEl, val[i], FullShell"
            #print atAN, FormValEl, val[i], FullShell
            #FormCharge = FormCharge % 2
            atA.SetFormalCharge(FormCharge)

        # Export file
        mol.DeleteNonPolarHydrogens()
        conv = ob.OBConversion()
        conv.SetOutFormat(picformat)
        conv.AddOption('C')
        conv.WriteFile(mol, filename)
예제 #22
0
    def _align_heavy_atoms(mol1, mol2, vmol1, vmol2, ilabel1, ilabel2,
                           eq_atoms):
        """
        Align the label of topologically identical atoms of second molecule
        towards first molecule

        Args:
            mol1: First molecule. OpenBabel OBMol object
            mol2: Second molecule. OpenBabel OBMol object
            vmol1: First virtual molecule constructed by centroids. OpenBabel
                OBMol object
            vmol2: First virtual molecule constructed by centroids. OpenBabel
                OBMol object
            ilabel1: inchi label map of the first molecule
            ilabel2: inchi label map of the second molecule
            eq_atoms: equivalent atom lables

        Return:
            corrected inchi labels of heavy atoms of the second molecule
        """

        nvirtual = vmol1.NumAtoms()
        nheavy = len(ilabel1)

        for i in ilabel2:  # add all heavy atoms
            a1 = vmol1.NewAtom()
            a1.SetAtomicNum(1)
            a1.SetVector(0.0, 0.0, 0.0)  # useless, just to pair with vmol2
            oa2 = mol2.GetAtom(i)
            a2 = vmol2.NewAtom()
            a2.SetAtomicNum(1)
            # align using the virtual atoms, these atoms are not
            # used to align, but match by positions
            a2.SetVector(oa2.GetVector())

        aligner = ob.OBAlign(False, False)
        aligner.SetRefMol(vmol1)
        aligner.SetTargetMol(vmol2)
        aligner.Align()
        aligner.UpdateCoords(vmol2)

        canon_mol1 = ob.OBMol()
        for i in ilabel1:
            oa1 = mol1.GetAtom(i)
            a1 = canon_mol1.NewAtom()
            a1.SetAtomicNum(oa1.GetAtomicNum())
            a1.SetVector(oa1.GetVector())

        aligned_mol2 = ob.OBMol()
        for i in range(nvirtual + 1, nvirtual + nheavy + 1):
            oa2 = vmol2.GetAtom(i)
            a2 = aligned_mol2.NewAtom()
            a2.SetAtomicNum(oa2.GetAtomicNum())
            a2.SetVector(oa2.GetVector())

        canon_label2 = list(range(1, nheavy+1))
        for symm in eq_atoms:
            for i in symm:
                canon_label2[i-1] = -1
        for symm in eq_atoms:
            candidates1 = list(symm)
            candidates2 = list(symm)
            for c2 in candidates2:
                distance = 99999.0
                canon_idx = candidates1[0]
                a2 = aligned_mol2.GetAtom(c2)
                for c1 in candidates1:
                    a1 = canon_mol1.GetAtom(c1)
                    d = a1.GetDistance(a2)
                    if d < distance:
                        distance = d
                        canon_idx = c1
                canon_label2[c2-1] = canon_idx
                candidates1.remove(canon_idx)

        canon_inchi_orig_map2 = [(canon, inchi, orig)
                                 for canon, inchi, orig in
                                 zip(canon_label2, list(range(1, nheavy + 1)),
                                     ilabel2)]
        canon_inchi_orig_map2.sort(key=lambda m: m[0])
        heavy_atom_indices2 = tuple([x[2] for x in canon_inchi_orig_map2])
        return heavy_atom_indices2
def create_new_atom(atom_no):
  molecule = ob.OBMol()
  atom = molecule.NewAtom()
  atom.SetAtomicNum(atom_no)
  py_mol = pybel.Molecule(molecule)
  drawObject(py_mol, picLabel)
예제 #24
0
    def _align_hydrogen_atoms(mol1, mol2, heavy_indices1,
                              heavy_indices2):
        """
        Align the label of topologically identical atoms of second molecule
        towards first molecule

        Args:
            mol1: First molecule. OpenBabel OBMol object
            mol2: Second molecule. OpenBabel OBMol object
            heavy_indices1: inchi label map of the first molecule
            heavy_indices2: label map of the second molecule

        Return:
            corrected label map of all atoms of the second molecule
        """
        num_atoms = mol2.NumAtoms()
        all_atom = set(range(1, num_atoms+1))
        hydrogen_atoms1 = all_atom - set(heavy_indices1)
        hydrogen_atoms2 = all_atom - set(heavy_indices2)
        label1 = heavy_indices1 + tuple(hydrogen_atoms1)
        label2 = heavy_indices2 + tuple(hydrogen_atoms2)

        cmol1 = ob.OBMol()
        for i in label1:
            oa1 = mol1.GetAtom(i)
            a1 = cmol1.NewAtom()
            a1.SetAtomicNum(oa1.GetAtomicNum())
            a1.SetVector(oa1.GetVector())
        cmol2 = ob.OBMol()
        for i in label2:
            oa2 = mol2.GetAtom(i)
            a2 = cmol2.NewAtom()
            a2.SetAtomicNum(oa2.GetAtomicNum())
            a2.SetVector(oa2.GetVector())

        aligner = ob.OBAlign(False, False)
        aligner.SetRefMol(cmol1)
        aligner.SetTargetMol(cmol2)
        aligner.Align()
        aligner.UpdateCoords(cmol2)

        hydrogen_label2 = []
        hydrogen_label1 = list(range(len(heavy_indices1) + 1, num_atoms + 1))
        for h2 in range(len(heavy_indices2) + 1, num_atoms + 1):
            distance = 99999.0
            idx = hydrogen_label1[0]
            a2 = cmol2.GetAtom(h2)
            for h1 in hydrogen_label1:
                a1 = cmol1.GetAtom(h1)
                d = a1.GetDistance(a2)
                if d < distance:
                    distance = d
                    idx = h1
            hydrogen_label2.append(idx)
            hydrogen_label1.remove(idx)

        hydrogen_orig_idx2 = label2[len(heavy_indices2):]
        hydrogen_canon_orig_map2 = [(canon, orig) for canon, orig
                                    in zip(hydrogen_label2,
                                           hydrogen_orig_idx2)]
        hydrogen_canon_orig_map2.sort(key=lambda m: m[0])
        hydrogen_canon_indices2 = [x[1] for x in hydrogen_canon_orig_map2]

        canon_label1 = label1
        canon_label2 = heavy_indices2 + tuple(hydrogen_canon_indices2)

        return canon_label1, canon_label2
예제 #25
0
파일: preparing.py 프로젝트: HadXu/PMP
def make_graph(name, gb_structure, gb_scalar_coupling):
    # ['id', 'molecule_name', 'atom_index_0', 'atom_index_1', 'type','scalar_coupling_constant']
    coupling_df = gb_scalar_coupling.get_group(name)

    # [molecule_name,atom_index,atom,x,y,z]
    df = gb_structure.get_group(name)
    df = df.sort_values(['atom_index'], ascending=True)
    a = df.atom.values.tolist()
    xyz = df[['x', 'y', 'z']].values

    mol = mol_from_axyz(a, xyz)
    mol_op = openbabel.OBMol()
    obConversion.ReadFile(mol_op, f'../input/champs-scalar-coupling/structures/{name}.xyz')

    factory = ChemicalFeatures.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
    feature = factory.GetFeaturesForMol(mol)

    num_atom = mol.GetNumAtoms()
    symbol = np.zeros((num_atom, len(SYMBOL)), np.uint8)  # category
    acceptor = np.zeros((num_atom, 1), np.uint8)
    donor = np.zeros((num_atom, 1), np.uint8)
    aromatic = np.zeros((num_atom, 1), np.uint8)
    hybridization = np.zeros((num_atom, len(HYBRIDIZATION)), np.uint8)
    num_h = np.zeros((num_atom, 1), np.float32)  # real
    atomic = np.zeros((num_atom, 1), np.float32)

    # new features
    degree = np.zeros((num_atom, 1), np.uint8)
    formalCharge = np.zeros((num_atom, 1), np.float32)
    chiral_tag = np.zeros((num_atom, 1), np.uint8)
    crippen_contribs = np.zeros((num_atom, 2), np.float32)
    tpsa = np.zeros((num_atom, 1), np.float32)
    labute_asac = np.zeros((num_atom, 1), np.float32)
    gasteiger_charges = np.zeros((num_atom, 1), np.float32)
    esataindices = np.zeros((num_atom, 1), np.float32)
    atomic_radiuss = np.zeros((num_atom, 1), np.float32)
    electronegate = np.zeros((num_atom, 1), np.float32)
    electronegate_sqre = np.zeros((num_atom, 1), np.float32)
    mass = np.zeros((num_atom, 1), np.float32)
    van = np.zeros((num_atom, 1), np.float32)
    cov = np.zeros((num_atom, 1), np.float32)
    ion = np.zeros((num_atom, 1), np.float32)

    for i in range(num_atom):
        atom = mol.GetAtomWithIdx(i)
        atom_op = mol_op.GetAtomById(i)
        symbol[i] = one_hot_encoding(atom.GetSymbol(), SYMBOL)
        aromatic[i] = atom.GetIsAromatic()
        hybridization[i] = one_hot_encoding(atom.GetHybridization(), HYBRIDIZATION)
        num_h[i] = atom.GetTotalNumHs(includeNeighbors=True)
        atomic[i] = atom.GetAtomicNum()

        degree[i] = atom.GetTotalDegree()
        formalCharge[i] = atom.GetFormalCharge()
        chiral_tag[i] = int(atom.GetChiralTag())

        crippen_contribs[i] = rdMolDescriptors._CalcCrippenContribs(mol)[i]
        tpsa[i] = rdMolDescriptors._CalcTPSAContribs(mol)[i]
        labute_asac[i] = rdMolDescriptors._CalcLabuteASAContribs(mol)[0][i]
        gasteiger_charges[i] = atom_op.GetPartialCharge()
        esataindices[i] = EState.EStateIndices(mol)[i]
        atomic_radiuss[i] = atomic_radius[atom.GetSymbol()]
        electronegate[i] = electronegativity[atom.GetSymbol()]
        electronegate_sqre[i] = electronegativity_square[atom.GetSymbol()]
        mass[i] = atomic_mass[atom.GetSymbol()]
        van[i] = vanderwaalsradius[atom.GetSymbol()]
        cov[i] = covalenzradius[atom.GetSymbol()]
        ion[i] = ionization_energy[atom.GetSymbol()]

    for t in range(0, len(feature)):
        if feature[t].GetFamily() == 'Donor':
            for i in feature[t].GetAtomIds():
                donor[i] = 1
        elif feature[t].GetFamily() == 'Acceptor':
            for i in feature[t].GetAtomIds():
                acceptor[i] = 1

    num_edge = num_atom * num_atom - num_atom
    edge_index = np.zeros((num_edge, 2), np.uint32)
    bond_type = np.zeros((num_edge, len(BOND_TYPE)), np.uint32)
    distance = np.zeros((num_edge, 1), np.float32)
    angle = np.zeros((num_edge, 1), np.float32)

    norm_xyz = preprocessing.normalize(xyz, norm='l2')

    ij = 0
    for i in range(num_atom):
        for j in range(num_atom):
            if i == j: continue
            edge_index[ij] = [i, j]

            bond = mol.GetBondBetweenAtoms(i, j)
            if bond is not None:
                bond_type[ij] = one_hot_encoding(bond.GetBondType(), BOND_TYPE)

            distance[ij] = np.linalg.norm(xyz[i] - xyz[j])
            angle[ij] = (norm_xyz[i] * norm_xyz[j]).sum()

            ij += 1

    xyz = xyz * 1.889726133921252

    atom = System(symbols=a, positions=xyz)
    acsf = ACSF_GENERATOR.create(atom)

    l = []
    for item in coupling_df[['atom_index_0', 'atom_index_1']].values.tolist():
        i = edge_index.tolist().index(item)
        l.append(i)

    l = np.array(l)

    coupling_edge_index = np.concatenate([coupling_df[['atom_index_0', 'atom_index_1']].values, l.reshape(len(l), 1)],
                                         axis=1)

    coupling = Coupling(coupling_df['id'].values,
                        coupling_df[['fc', 'sd', 'pso', 'dso']].values,
                        coupling_edge_index,
                        np.array([COUPLING_TYPE.index(t) for t in coupling_df.type.values], np.int32),
                        coupling_df['scalar_coupling_constant'].values,
                        )

    graph = Graph(
        name,
        Chem.MolToSmiles(mol),
        [a, xyz],
        [acsf, symbol, acceptor, donor, aromatic, hybridization, num_h, atomic, degree, formalCharge, chiral_tag,
         crippen_contribs, tpsa, labute_asac, gasteiger_charges, esataindices, atomic_radiuss, electronegate,
         electronegate_sqre, mass, van, cov, ion],
        [bond_type, distance, angle, ],
        edge_index,
        coupling,
    )

    return graph
예제 #26
0
def hydrogenate_and_compute_partial_charges(input_file,
                                            input_format,
                                            hyd_output=None,
                                            pdbqt_output=None,
                                            protein=True,
                                            verbose=True):
    """Outputs a hydrogenated pdb and a pdbqt with partial charges.

  Takes an input file in specified format. Generates two outputs:

  -) A pdb file that contains a hydrogenated (at pH 7.4) version of
     original compound.
  -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt
     file is build from the hydrogenated pdb.

  TODO(rbharath): Can do a bit of refactoring between this function and
  pdbqt_to_pdb.

  Parameters
  ----------
  input_file: String
    Path to input file.
  input_format: String
    Name of input format.
  """
    basename = os.path.basename(input_file).split(".")[0]
    # Since this function passes data to C++ obabel classes, we need to
    # constantly cast to str to convert unicode to char*
    if verbose:
        print("Create pdb with hydrogens added")
    hyd_conversion = openbabel.OBConversion()
    hyd_conv = hyd_conversion.SetInAndOutFormats(str(input_format), str("pdb"))
    mol = openbabel.OBMol()
    hyd_conversion.ReadFile(mol, str(input_file))
    # AddHydrogens(not-polaronly, correctForPH, pH)
    mol.AddHydrogens(False, True, 7.4)
    hyd_out = hyd_conversion.WriteFile(mol, str(hyd_output))

    if verbose:
        print("Create a pdbqt file from the hydrogenated pdb above.")
    charge_conversion = openbabel.OBConversion()
    charge_conv = charge_conversion.SetInAndOutFormats(str("pdb"),
                                                       str("pdbqt"))

    if protein:
        print("Make protein rigid.")
        charge_conversion.AddOption(str("r"), charge_conversion.OUTOPTIONS)
        charge_conversion.AddOption(str("c"), charge_conversion.OUTOPTIONS)
    print("Preserve hydrogens")
    charge_conversion.AddOption(str("h"), charge_conversion.OUTOPTIONS)
    print("Preserve atom indices")
    charge_conversion.AddOption(str("p"), charge_conversion.OUTOPTIONS)
    print("preserve atom indices.")
    charge_conversion.AddOption(str("n"), charge_conversion.OUTOPTIONS)

    print("About to run obabel conversion.")
    mol = openbabel.OBMol()
    charge_conversion.ReadFile(mol, str(hyd_output))
    force_partial_charge_computation(mol)
    charge_conversion.WriteFile(mol, str(pdbqt_output))

    if protein:
        print("Removing ROOT/ENDROOT/TORSDOF")
        with open(pdbqt_output) as f:
            pdbqt_lines = f.readlines()
        filtered_lines = []
        for line in pdbqt_lines:
            if "ROOT" in line or "ENDROOT" in line or "TORSDOF" in line:
                continue
            filtered_lines.append(line)
        with open(pdbqt_output, "w") as f:
            f.writelines(filtered_lines)
 def createParams(self, event):
     # Change to the sandbox location
     goToSandbox()
     self.selectedType = self.resMenu.GetStringSelection().strip()
     # Before doing any of the following, we should first scan all of the params files in the Rosetta database
     # to see if a file already exists for this type
     # If we find a hit, we'll let the user decide based on some filename and directory information if that is the
     # params file they want, because we should be using Rosetta's files if they are available
     if (platform.system() == "Windows"):
         root = os.environ[
             "PYROSETTA_DATABASE"] + "\\chemical\\residue_type_sets\\fa_standard\\residue_types"
     else:
         root = os.environ[
             "PYROSETTA_DATABASE"] + "/chemical/residue_type_sets/fa_standard/residue_types"
     accepted = False
     for dpath, dnames, fnames in os.walk(root):
         for fname in fnames:
             if (platform.system() == "Windows"):
                 fullpath = dpath + "\\" + fname
             else:
                 fullpath = dpath + "/" + fname
             if (fname.endswith(".params")):
                 matches = False
                 f = open(fullpath, "r")
                 for aline in f:
                     if (aline[0:4] == "NAME" and aline.split()[1].strip()
                             == self.selectedType):
                         matches = True
                         break
                 if (matches):
                     # Tell the user there is a hit, what the filename was, and what the directory is
                     msg = "Found a parameters file that matches this unrecognized residue in Rosetta's database.\n\nFilename: " + fname.strip(
                     ) + "\nDirectory: " + dpath.strip(
                     ) + "\n\nDo you want to import this parameters file?"
                     dlg = wx.MessageDialog(
                         self, msg, "Params File Found",
                         wx.YES_NO | wx.ICON_EXCLAMATION | wx.CENTRE)
                     if (dlg.ShowModal() == wx.ID_YES):
                         accepted = True
                         dlg.Destroy()
                         break
                     dlg.Destroy()
         if (accepted):
             break
     if (accepted):
         # Copy over the file and we're done
         goToSandbox("params")
         f = open(fullpath, "r")
         f2 = open(self.selectedType + ".fa.params", "w")
         for aline in f:
             f2.write(aline.strip() + "\n")
         f.close()
         f2.close()
         wx.MessageBox(
             "Your parameters file was imported successfully!  InteractiveROSETTA will now recognize "
             + self.selectedType + " entries.",
             "Params Creation Successful", wx.OK | wx.ICON_EXCLAMATION)
         self.grdParamsAtoms.ClearGrid()
         self.atomMenu.Clear()
         self.typeMenu.Clear()
         self.btnAdd.Disable()
         self.btnCreate.Disable()
         self.unrecognized_types.pop(self.selectedType)
         self.resMenu.Clear()
         self.resMenu.AppendItems(self.unrecognized_types.keys())
         self.removeMenu.Append(self.selectedType)
         return
     # First we have to create a temporary PDB file that contains only the ATOM/HETATM
     # records for the selected unrecognized residue type
     for aline in self.unrecognized_types[self.selectedType]:
         if (aline[0:4] == "ATOM" or aline[0:6] == "HETATM"):
             self.seqpos = int(aline[22:26])
             self.chainID = aline[21].strip()
             break
     f = open("params.pdb", "w")
     for aline in self.unrecognized_types[self.selectedType]:
         f.write(aline + "\n")
     f.close()
     self.txtCode.SetValue(str(self.selectedType))
     # Now use openbabel to convert this PDB file to a mol2 file
     obConversion = openbabel.OBConversion()
     obConversion.SetInAndOutFormats("pdb", "mol2")
     mol = openbabel.OBMol()
     obConversion.ReadFile(mol, "params.pdb")
     obConversion.WriteFile(mol, 'params.mol2')
     obConversion.CloseOutFile()
     # Now read the mol2 file as usual
     # Attempt to generate the params file
     try:
         if (os.path.isfile("LG.params")):
             os.remove("LG.params")
         if (os.path.isfile("LG.fa.params")):
             os.remove("LG.fa.params")
         if (os.path.isfile("LG.cen.params")):
             os.remove("LG.cen.params")
         #molfile_to_params.main(["params.mol2", "--no-pdb", "--keep-names"])
         molfile_to_params.main(
             ["params.mol2", "--no-pdb", "--keep-names", "-c"])
     except:
         if (platform.system() == "Windows"):
             dlg = wx.MessageDialog(
                 self,
                 "OpenBabel was not able to convert this residue to a mol2 file!\n\nDid you install OpenBabel separately?  If not, would you like to view the download page?",
                 "Params File Found",
                 wx.YES_NO | wx.ICON_EXCLAMATION | wx.CENTRE)
             if (dlg.ShowModal() == wx.ID_YES):
                 webbrowser.open(
                     "http://sourceforge.net/projects/openbabel/files/openbabel/2.3.2/OpenBabel2.3.2a_Windows_Installer.exe/download"
                 )
             dlg.Destroy()
         else:
             wx.MessageBox(
                 "OpenBabel was not able to convert this residue to a mol2 file!",
                 "File Cannot Be Processed", wx.OK | wx.ICON_EXCLAMATION)
         return
     logInfo("Params file created successfully")
     # Now read the LG.params file and grab out the atom names and their assigned types
     # so the user can see them and modify them if desired
     f = open("LG.fa.params", "r")
     if (self.grdParamsAtoms.NumberRows > 0):
         self.grdParamsAtoms.DeleteRows(0, self.grdParamsAtoms.NumberRows)
     self.atomnames = []
     atomtypes = []
     for aline in f:
         if (aline[0:4] == "ATOM"):
             atomname = aline.split()[1]
             atomtype = aline.split()[2]
             self.atomnames.append(atomname)
             atomtypes.append(atomtype)
     f.close()
     # Sort the atomnames to make it easier for the user to find things
     for i in range(0, len(self.atomnames) - 1):
         lowest = i
         for j in range(i + 1, len(self.atomnames)):
             if (self.atomnames[j] < self.atomnames[lowest]):
                 lowest = j
         temp = self.atomnames[i]
         self.atomnames[i] = self.atomnames[lowest]
         self.atomnames[lowest] = temp
         temp = atomtypes[i]
         atomtypes[i] = atomtypes[lowest]
         atomtypes[lowest] = temp
     # Now add things to the grid
     for i in range(0, len(self.atomnames)):
         self.grdParamsAtoms.AppendRows(1)
         self.grdParamsAtoms.SetRowLabelValue(i, self.atomnames[i])
         self.grdParamsAtoms.SetCellValue(i, 0, atomtypes[i])
         self.grdParamsAtoms.SetCellAlignment(i, 0, wx.ALIGN_CENTRE,
                                              wx.ALIGN_CENTRE)
         readOnly = wx.grid.GridCellAttr()
         readOnly.SetReadOnly(True)
         self.grdParamsAtoms.SetRowAttr(i, readOnly)
     # Update some of the atom selection menus with the list of atomnames
     self.atomMenu.Clear()
     self.atomMenu.AppendItems(self.atomnames)
     self.NtermMenu.Clear()
     self.NtermMenu.AppendItems(self.atomnames)
     self.CtermMenu.Clear()
     self.CtermMenu.AppendItems(self.atomnames)
     self.btnAdd.Enable()
예제 #28
0
     writer.writerow([
         'id', 'molecule_name', 'angle_1_0_6th0', 'angle_1_0_7th0',
         'angle_1_0_8th0', 'angle_1_0_9th0', 'angle_1_0_10th0',
         'angle_1_0_11th0', 'angle_1_0_6th1', 'angle_1_0_7th1',
         'angle_1_0_8th1', 'angle_1_0_9th1', 'angle_1_0_10th1',
         'angle_1_0_11th1', 'angle_0_closest0_1', 'angle_0_2nd0_1',
         'angle_0_3rd0_1', 'angle_0_4th0_1', 'angle_0_5th0_1',
         'angle_0_6th0_1', 'angle_0_7th0_1', 'angle_0_8th0_1',
         'angle_0_9th0_1', 'angle_0_10th0_1', 'angle_0_11th0_1',
         'angle_0_closest1_1', 'angle_0_2nd1_1', 'angle_0_3rd1_1',
         'angle_0_4th1_1', 'angle_0_5th1_1', 'angle_0_6th1_1',
         'angle_0_7th1_1', 'angle_0_8th1_1', 'angle_0_9th1_1',
         'angle_0_10th1_1', 'angle_0_11th1_1'
     ])
 for mylist in tqdm(angle_list_clos_2nd):
     mol = openbabel.OBMol()
     id_name = mylist[0]
     mol_name = mylist[1]
     idx = 2
     angle_list = [
         'angle_1_0_6th0', 'angle_1_0_7th0', 'angle_1_0_8th0',
         'angle_1_0_9th0', 'angle_1_0_10th0', 'angle_1_0_11th0',
         'angle_1_0_6th1', 'angle_1_0_7th1', 'angle_1_0_8th1',
         'angle_1_0_9th1', 'angle_1_0_10th1', 'angle_1_0_11th1',
         'angle_0_closest0_1', 'angle_0_2nd0_1', 'angle_0_3rd0_1',
         'angle_0_4th0_1', 'angle_0_5th0_1', 'angle_0_6th0_1',
         'angle_0_7th0_1', 'angle_0_8th0_1', 'angle_0_9th0_1',
         'angle_0_10th0_1', 'angle_0_11th0_1', 'angle_0_closest1_1',
         'angle_0_2nd1_1', 'angle_0_3rd1_1', 'angle_0_4th1_1',
         'angle_0_5th1_1', 'angle_0_6th1_1', 'angle_0_7th1_1',
         'angle_0_8th1_1', 'angle_0_9th1_1', 'angle_0_10th1_1',
예제 #29
0
파일: ob.py 프로젝트: qize/oddt
 def clone(self):
     return Molecule(ob.OBMol(self.OBMol))
예제 #30
0
    near_residues = {}
    for obatom in ob.OBResidueAtomIter(obres):
        atomcoord = numpy.array([obatom.x(), obatom.y(), obatom.z()])
        dist = numpy.sqrt(numpy.sum((npcoords - atomcoord)**2, axis=1))
        print "    %10.3f %10.3f %10.3f %s"%(obatom.x(), \
                obatom.y(), obatom.z(),  obres.GetAtomID(obatom))

        for index in numpy.flatnonzero(dist < args.mindist):
            res = obatoms[index].GetResidue()
            uniqname = res.GetName()+ "_" + str(res.GetIdx()) + "_" + \
                    res.GetChain()
            near_residues[uniqname] = res

    print "Now copying original molecule and removing atoms"
    extract_pdb = ob.OBMol(obmol)
    atomtoremove = []
    for res in ob.OBResidueIter(extract_pdb):
        uniqname = res.GetName()+ "_" + str(res.GetIdx()) + "_" + \
                    res.GetChain()
        if not (uniqname in near_residues):
            for obatom in ob.OBResidueAtomIter(res):
                atomtoremove.append(obatom)

    for atomtorm in atomtoremove:
        extract_pdb.DeleteAtom(atomtorm)

    extract_pdb.AddHydrogens()

    output = pybel.Outputfile("pdb", args.outputfile, overwrite=True)
    output.write(pybel.Molecule(extract_pdb))