def mmpbsa(xtc, tpr, R_idx, L_idx, fr_idx): frames_idx = 'frames_idx.ndx' index = 'index.ndx' # indicates the R/L indexed_xtc = 'final.xtc' with open(frames_idx, 'w') as f: f.writelines('[ frames ]\n') f.writelines('\n'.join([str(e) for e in fr_idx])) f.writelines('\n') cs.print('Frames index for calculating:\n', np.array(fr_idx)) gmx.make_ndx( f=tpr, o=index, input=( 'ri ' + str(R_idx[0]) + '-' + str(R_idx[1]), 'name 19 receptor', # 19 'ri ' + str(L_idx[0]) + '-' + str(L_idx[1]), 'name 20 ligand', 'q')) # 20 cs.log('gmx-trjconv by frames idx list...') gmx.trjconv(f=xtc, o=indexed_xtc, fr=frames_idx, n=index, input='1') # only protein os.system('mkdir -p ' + str(fr_idx[0]) + '_' + str(fr_idx[-1])) run_api(str(fr_idx[0]) + '_' + str(fr_idx[-1]), tpr, indexed_xtc, index, com='Protein', rec='receptor', lig='ligand', b=0, e=10000, i=1)
def make_index(struct, ndx='main.ndx', oldndx=None): """Make index file with the special groups. This routine adds the group __main__ and the group __environment__ to the end of the index file. __main__ contains what the user defines as the *central* and *most important* parts of the system. __environment__ is everything else. The template mdp file, for instance, uses these two groups for T-coupling. These groups are mainly useful if the default groups "Protein" and "Non-Protein" are not appropriate. By using symbolic names such as __main__ one can keep scripts more general. :Returns: *groups* is a list of dictionaries that describe the index groups. See :func:`gromacs.cbook.parse_ndxlist` for details. :Arguments: *struct* : filename structure (tpr, pdb, gro) *selection* : string is a ``make_ndx`` command such as ``"Protein"`` or ``r DRG`` which determines what is considered the main group for centering etc. It is passed directly to ``make_ndx``. *ndx* : string name of the final index file *oldndx* : string name of index file that should be used as a basis; if None then the ``make_ndx`` default groups are used. This routine is very dumb at the moment; maybe some heuristics will be added later as could be other symbolic groups such as __membrane__. """ #logging.info("Building the main index file %(ndx)r..." % vars()) print("make_ndx") # pass 1: select # empty command '' important to get final list of groups rc,out,nothing = gromacs.make_ndx(f=struct, n=oldndx, o=ndx, stdout=False, #@UndefinedVariable input=('q')) #groups = gromacs.cbook.parse_ndxlist(out) #last = len(groups) - 1 #assert last == groups[-1]['nr'] # pass 2: # 1) last group is __main__ # 2) __environment__ is everything else (eg SOL, ions, ...) #rc,out,nothing = gromacs.make_ndx(f=struct, n=ndx, o=ndx, # stdout=False, # input=('name %d __main__' % last, # '! "__main__"', # is now group last+1 # 'name %d __environment__' % (last+1), # '', 'q')) print("done") print(out) return gromacs.cbook.parse_ndxlist(out)
def make_index_cys(self): """Make index file for all cysteines and water oxygens. **NO SANITY CHECKS**: The SH atoms are simply labelled consecutively with the resids from the cysteines parameter. """ commands_1 = ['keep 0', 'del 0', 'r CYSH & t S', 'splitres 0', 'del 0'] # CYS-S sorted by resid commands_2 = ['t OW', 'q'] # water oxygens commands = commands_1[:] for groupid, resid in enumerate(self.parameters.cysteines): commands.append('name %(groupid)d Cys%(resid)d' % vars()) # name CYS-S groups canonically commands.extend(commands_2) return gromacs.make_ndx(f=self.simulation.tpr, o=self.parameters.ndx, input=commands, stdout=None)
def test2(a,radius,box=100.0,fbase=None): # l.test2(5.0797,15) if fbase is None: fbase = "{}_{}".format(box,radius) top = "{}.top".format(fbase) struct = "{}.pdb".format(fbase) sol = "{}.sol.pdb".format(fbase) ndx = "{}.ndx".format(fbase) origin = box/2. pts=fcc_sphere(a, radius) w=writer("{}.pdb".format(fbase)) w.CRYST1([box,box,box,90.00,90.00,90.00]) for index,atom in enumerate(pts): w.ATOM(serial=index+1, name="AU", resName="NP", resSeq=1, chainID="A", segID="AUNP", element="AU", x=atom[0]+origin, y=atom[1]+origin, z=atom[2]+origin) w.close() #make_index("{}.pdb".format(fbase), "{}.ndx".format(fbase)) with file(top, "w") as t: t.write(top_src) t.write("Au {}\n".format(pts.shape[0])) gromacs.genbox(p=top, cp=struct, cs="spc216.gro", o=sol, vdwd="0.15") #@UndefinedVariable rc,out,nothing = gromacs.make_ndx(f=sol, n=None, o=ndx, stdout=False, #@UndefinedVariable input=('', '', 'q')) gromacs.grompp(f="md2.mdp", o="{}.tpr".format(fbase), c=sol, p=top, n=ndx) #@UndefinedVariable with file("{}.sh".format(fbase), "w") as f: f.write("#!/bin/bash\n") f.write("#PBS -k o\n") f.write("#PBS -l nodes=1:ppn=12:ccvt,walltime=24:00:00\n") f.write("#PBS -M [email protected]\n") f.write("#PBS -m abe\n") f.write("#PBS -N {}\n".format(fbase)) f.write("#PBS -j oe\n") f.write("#PBS -q pg\n") f.write("#PBS -d /N/dc/scratch/somogyie/Au\n") f.write("mpirun mdrun -deffnm {}".format(fbase))
def make_index_cys(self): """Make index file for all cysteines and water oxygens. **NO SANITY CHECKS**: The SH atoms are simply labelled consecutively with the resids from the cysteines parameter. """ commands_1 = [ 'keep 0', 'del 0', 'r CYSH & t S', 'splitres 0', 'del 0' ] # CYS-S sorted by resid commands_2 = ['t OW', 'q'] # water oxygens commands = commands_1[:] for groupid, resid in enumerate(self.parameters.cysteines): commands.append('name {groupid:d} Cys{resid:d}'.format( **vars())) # name CYS-S groups canonically commands.extend(commands_2) return gromacs.make_ndx(f=self.simulation.tpr, o=self.parameters.ndx, input=commands, stdout=None)
def solvate(struct='top/protein.pdb', top='top/system.top', distance=0.9, boxtype='dodecahedron', concentration=0, cation='NA', anion='CL', water='spc', solvent_name='SOL', with_membrane=False, ndx = 'main.ndx', mainselection = '"Protein"', dirname='solvate', **kwargs): """Put protein into box, add water, add counter-ions. Currently this really only supports solutes in water. If you need to embedd a protein in a membrane then you will require more sophisticated approaches. However, you *can* supply a protein already inserted in a bilayer. In this case you will probably want to set *distance* = ``None`` and also enable *with_membrane* = ``True`` (using extra big vdw radii for typical lipids). .. Note:: The defaults are suitable for solvating a globular protein in a fairly tight (increase *distance*!) dodecahedral box. :Arguments: *struct* : filename pdb or gro input structure *top* : filename Gromacs topology *distance* : float When solvating with water, make the box big enough so that at least *distance* nm water are between the solute *struct* and the box boundary. Set *boxtype* to ``None`` in order to use a box size in the input file (gro or pdb). *boxtype* or *bt*: string Any of the box types supported by :class:`~gromacs.tools.Editconf` (triclinic, cubic, dodecahedron, octahedron). Set the box dimensions either with *distance* or the *box* and *angle* keywords. If set to ``None`` it will ignore *distance* and use the box inside the *struct* file. *bt* overrides the value of *boxtype*. *box* List of three box lengths [A,B,C] that are used by :class:`~gromacs.tools.Editconf` in combination with *boxtype* (``bt`` in :program:`editconf`) and *angles*. Setting *box* overrides *distance*. *angles* List of three angles (only necessary for triclinic boxes). *concentration* : float Concentration of the free ions in mol/l. Note that counter ions are added in excess of this concentration. *cation* and *anion* : string Molecule names of the ions. This depends on the chosen force field. *water* : string Name of the water model; one of "spc", "spce", "tip3p", "tip4p". This should be appropriate for the chosen force field. If an alternative solvent is required, simply supply the path to a box with solvent molecules (used by :func:`~gromacs.genbox`'s *cs* argument) and also supply the molecule name via *solvent_name*. *solvent_name* Name of the molecules that make up the solvent (as set in the itp/top). Typically needs to be changed when using non-standard/non-water solvents. ["SOL"] *with_membrane* : bool ``True``: use special ``vdwradii.dat`` with 0.1 nm-increased radii on lipids. Default is ``False``. *ndx* : filename How to name the index file that is produced by this function. *mainselection* : string A string that is fed to :class:`~gromacs.tools.Make_ndx` and which should select the solute. *dirname* : directory name Name of the directory in which all files for the solvation stage are stored. *includes* List of additional directories to add to the mdp include path *kwargs* Additional arguments are passed on to :class:`~gromacs.tools.Editconf` or are interpreted as parameters to be changed in the mdp file. """ structure = realpath(struct) topology = realpath(top) # arguments for editconf that we honour editconf_keywords = ["box", "bt", "angles", "c", "center", "aligncenter", "align", "translate", "rotate", "princ"] editconf_kwargs = dict((k,kwargs.pop(k,None)) for k in editconf_keywords) editconf_boxtypes = ["triclinic", "cubic", "dodecahedron", "octahedron", None] # needed for topology scrubbing scrubber_kwargs = {'marker': kwargs.pop('marker',None)} # sanity checks and argument dependencies bt = editconf_kwargs.pop('bt') boxtype = bt if bt else boxtype # bt takes precedence over boxtype if not boxtype in editconf_boxtypes: msg = "Unsupported boxtype {boxtype!r}: Only {boxtypes!r} are possible.".format(**vars()) logger.error(msg) raise ValueError(msg) if editconf_kwargs['box']: distance = None # if box is set then user knows what she is doing... # handle additional include directories (kwargs are also modified!) mdp_kwargs = cbook.add_mdp_includes(topology, kwargs) if water.lower() in ('spc', 'spce'): water = 'spc216' elif water.lower() == 'tip3p': water = 'spc216' logger.warning("TIP3P water model selected: using SPC equilibrated box " "for initial solvation because it is a reasonable starting point " "for any 3-point model. EQUILIBRATE THOROUGHLY!") # By default, grompp should not choke on a few warnings because at # this stage the user cannot do much about it (can be set to any # value but is kept undocumented...) grompp_maxwarn = kwargs.pop('maxwarn',10) # clean topology (if user added the marker; the default marker is # ; Gromacs auto-generated entries follow: n_removed = cbook.remove_molecules_from_topology(topology, **scrubber_kwargs) with in_dir(dirname): logger.info("[{dirname!s}] Solvating with water {water!r}...".format(**vars())) if boxtype is None: hasBox = False ext = os.path.splitext(structure)[1] if ext == '.gro': hasBox = True elif ext == '.pdb': with open(structure) as struct: for line in struct: if line.startswith('CRYST'): hasBox = True break if not hasBox: msg = "No box data in the input structure {structure!r} and boxtype is set to None".format(**vars()) logger.exception(msg) raise MissingDataError(msg) distance = boxtype = None # ensures that editconf just converts editconf_kwargs.update({'f': structure, 'o': 'boxed.gro', 'bt': boxtype, 'd': distance}) gromacs.editconf(**editconf_kwargs) if with_membrane: vdwradii_dat = get_lipid_vdwradii() # need to clean up afterwards logger.info("Using special vdW radii for lipids {0!r}".format(vdw_lipid_resnames)) try: gromacs.genbox(p=topology, cp='boxed.gro', cs=water, o='solvated.gro') except: if with_membrane: # remove so that it's not picked up accidentally utilities.unlink_f(vdwradii_dat) raise logger.info("Solvated system with %s", water) with open('none.mdp','w') as mdp: mdp.write('; empty mdp file\ninclude = {include!s}\nrcoulomb = 1\nrvdw = 1\nrlist = 1\n'.format(**mdp_kwargs)) qtotgmx = cbook.grompp_qtot(f='none.mdp', o='topol.tpr', c='solvated.gro', p=topology, stdout=False, maxwarn=grompp_maxwarn) qtot = round(qtotgmx) logger.info("[{dirname!s}] After solvation: total charge qtot = {qtotgmx!r} = {qtot!r}".format(**vars())) if concentration != 0: logger.info("[{dirname!s}] Adding ions for c = {concentration:f} M...".format(**vars())) # target concentration of free ions c ==> # N = N_water * c/c_water # add ions for concentration to the counter ions (counter ions are less free) # # get number of waters (count OW ... works for SPC*, TIP*P water models) rc,output,junk = gromacs.make_ndx(f='topol.tpr', o='ow.ndx', input=('keep 0', 'del 0', 'a OW*', 'name 0 OW', '', 'q'), stdout=False) groups = cbook.parse_ndxlist(output) gdict = {g['name']: g for g in groups} # overkill... N_water = gdict['OW']['natoms'] # ... but dict lookup is nice N_ions = int(N_water * concentration/CONC_WATER) # number of monovalents else: N_ions = 0 # neutralize (or try -neutral switch of genion???) n_cation = n_anion = 0 if qtot > 0: n_anion = int(abs(qtot)) elif qtot < 0: n_cation = int(abs(qtot)) n_cation += N_ions n_anion += N_ions if n_cation != 0 or n_anion != 0: # sanity check: assert qtot + n_cation - n_anion < 1e-6 logger.info("[{dirname!s}] Adding n_cation = {n_cation:d} and n_anion = {n_anion:d} ions...".format(**vars())) gromacs.genion(s='topol.tpr', o='ionized.gro', p=topology, pname=cation, nname=anion, np=n_cation, nn=n_anion, input=solvent_name) else: # fake ionized file ... makes it easier to continue without too much fuzz try: os.unlink('ionized.gro') except OSError, err: if err.errno != errno.ENOENT: raise os.symlink('solvated.gro', 'ionized.gro') qtot = cbook.grompp_qtot(f='none.mdp', o='ionized.tpr', c='ionized.gro', p=topology, stdout=False, maxwarn=grompp_maxwarn) if abs(qtot) > 1e-4: wmsg = "System has non-zero total charge qtot = {qtot:g} e.".format(**vars()) warnings.warn(wmsg, category=BadParameterWarning) logger.warn(wmsg) # make main index try: make_main_index('ionized.tpr', selection=mainselection, ndx=ndx) except GromacsError, err: # or should I rather fail here? wmsg = "Failed to make main index file %r ... maybe set mainselection='...'.\n"\ "The error message was:\n%s\n" % (ndx, str(err)) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning)
def make_main_index(struct, selection='"Protein"', ndx='main.ndx', oldndx=None): """Make index file with the special groups. This routine adds the group __main__ and the group __environment__ to the end of the index file. __main__ contains what the user defines as the *central* and *most important* parts of the system. __environment__ is everything else. The template mdp file, for instance, uses these two groups for T-coupling. These groups are mainly useful if the default groups "Protein" and "Non-Protein" are not appropriate. By using symbolic names such as __main__ one can keep scripts more general. :Returns: *groups* is a list of dictionaries that describe the index groups. See :func:`gromacs.cbook.parse_ndxlist` for details. :Arguments: *struct* : filename structure (tpr, pdb, gro) *selection* : string is a ``make_ndx`` command such as ``"Protein"`` or ``r DRG`` which determines what is considered the main group for centering etc. It is passed directly to ``make_ndx``. *ndx* : string name of the final index file *oldndx* : string name of index file that should be used as a basis; if None then the ``make_ndx`` default groups are used. This routine is very dumb at the moment; maybe some heuristics will be added later as could be other symbolic groups such as __membrane__. """ logger.info("Building the main index file {ndx!r}...".format(**vars())) # pass 1: select # get a list of groups # need the first "" to get make_ndx to spit out the group list. _,out,_ = gromacs.make_ndx(f=struct, n=oldndx, o=ndx, stdout=False, input=("", "q")) groups = cbook.parse_ndxlist(out) # find the matching groups, # there is a nasty bug in GROMACS where make_ndx may have multiple # groups, which caused the previous approach to fail big time. # this is a work around the make_ndx bug. # striping the "" allows compatibility with existing make_ndx selection commands. selection = selection.strip("\"") selected_groups = [g for g in groups if g['name'].lower() == selection.lower()] if len(selected_groups) > 1: logging.warn("make_ndx created duplicated groups, performing work around") if len(selected_groups) <= 0: msg = "no groups found for selection {0}, available groups are {1}".format(selection, groups) logging.error(msg) raise ValueError(msg) # Found at least one matching group, we're OK # index of last group last = len(groups) - 1 assert last == groups[-1]['nr'] group = selected_groups[0] # pass 2: # 1) last group is __main__ # 2) __environment__ is everything else (eg SOL, ions, ...) _,out,_ = gromacs.make_ndx(f=struct, n=ndx, o=ndx, stdout=False, # make copy selected group, this now has index last + 1 input=("{0}".format(group['nr']), # rename this to __main__ "name {0} __main__".format(last+1), # make a complement to this group, it get index last + 2 "! \"__main__\"", # rename this to __environment__ "name {0} __environment__".format(last+2), # list the groups "", # quit "q")) return cbook.parse_ndxlist(out)
def apply_distance(tpr, xtc, R_idx, L_idx, times_idx, fr_per_ps=1, bond_d=3.3): for i in times_idx: cs.rule('Processing frame: ' + str(i) + ' ps, ' + str(i * fr_per_ps) + ' f') # TODO: rerun control # if start <= 8000: # continue temp_ndx = str(i) + 'ps_tmp.ndx' frame_sys_pdb = str(i) + 'ps_sys.pdb' dsthoh_idx = str(i) + 'ps_dsthoh.ndx' rec_lig_ndx = str(i) + '.ndx' frame_pdb = str(i) + 'ps.pdb' frame_xtc = str(i) + 'ps.xtc' frame_tpr = str(i) + 'ps.tpr' gmx.trjconv(f=xtc, s=tpr, o=frame_sys_pdb, b=i, e=i, input='System') #################################################################### "make new index for R and L" select_R_cmd = 'ri ' + str(R_idx[0]) + '-' + str(R_idx[1]) select_L_cmd = 'ri ' + str(L_idx[0]) + '-' + str(L_idx[1]) gmx.make_ndx( f=frame_sys_pdb, o=temp_ndx, input=( select_R_cmd, select_L_cmd, # 19 20 'name 19 r_p', 'name 20 l_p', 'q')) gmx.select( f=xtc, n=temp_ndx, b=i, e=i, s=tpr, seltype='res_com', select= 'group Water and within 0.4 of group r_p and within 0.4 of group l_p', on=dsthoh_idx) dsthoh_list = read_gmx_selected_ndx(dsthoh_idx) ##################################################################### "get heavy Atom object of R, L and waters. See this_project/PDB.io.reader and Atom class for more details" protein_atoms, waters = structure_serialize(frame_sys_pdb, ['N', 'C', 'O']) "get ice object" ices = [ice for ice in waters if ice.res_seq in dsthoh_list] del waters # garbage collection "assign hydration HOH to R, L according to calculated nearest distance from R, L to hyHOHs, respectively" RHOHs, LHOHs = assign_hyhoh(protein_atoms, ices, R_idx, L_idx, bond_d) del ices, protein_atoms # garbage collection cs.print('\nNum of R/L HOH: ', len(RHOHs), len(LHOHs)) cs.print('R_HOHs:\n', np.array(RHOHs)) cs.print('L_HOHs:\n', np.array(LHOHs)) if len(RHOHs) == 0 and len(LHOHs) == 0: cs.print('\nWARNING IN ASSIGNMENT1: No hyhoh found!', style=f"red") os.system('rm -v ' + str(i) + '*') continue "run gmx-make_ndx to address (Protein + hydration HOH)" hyHOH_list = RHOHs + LHOHs hyHOH_list.sort() gmx.make_ndx( f=tpr, n=temp_ndx, o=temp_ndx, input=( 'r ' + ' '.join(str(hoh) for hoh in hyHOH_list), 'name 21 dstHOH', '1 | 21', # or indicates union set 'name 22 com', 'q')) # 19 cs.log("generate short-term xtc and sub-group tpr for mmpbsa", style=f'blue') gmx.trjconv(f=xtc, o=frame_xtc, b=i, e=i, n=temp_ndx, input='22') # gmx.convert_tpr(s=tpr, o=frame_tpr, n=temp_ndx, nsteps=-1, input='22') gmx.convert_tpr(s=tpr, o=frame_tpr, n=temp_ndx, input='22') "generate short-term average pdb for show and check, can be deleted" gmx.trjconv(f=xtc, s=tpr, o=frame_pdb, b=i, e=i, n=temp_ndx, input='22') "make new index for short_tpr and short_xtc" select_RH_cmd = 'r ' + ' '.join(str(hoh) for hoh in RHOHs) select_LH_cmd = 'r ' + ' '.join(str(hoh) for hoh in LHOHs) select_R_cmd = 'ri ' + str(R_idx[0]) + '-' + str(R_idx[1]) select_L_cmd = 'ri ' + str(L_idx[0]) + '-' + str(L_idx[1]) gmx.make_ndx( f=frame_tpr, o=rec_lig_ndx, input=( select_R_cmd, select_L_cmd, # 15 16 'name 15 r_p', 'name 16 l_p', select_RH_cmd, select_LH_cmd, # 17 (18) 'name 17 r_HOH', 'name 18 l_HOH', 'q')) if len(RHOHs) == 0: gmx.make_ndx( f=frame_tpr, n=rec_lig_ndx, o=rec_lig_ndx, input=( '15', 'name 18 receptor', # 18 '16 | 17', 'name 19 ligand', # 19 '18 | 19', 'name 20 com', 'q')) # 20 elif len(LHOHs) == 0: gmx.make_ndx( f=frame_tpr, n=rec_lig_ndx, o=rec_lig_ndx, input=( '15 | 17', 'name 18 receptor', # 18 '16', 'name 19 ligand', # 19 '18 | 19', 'name 20 com', 'q')) # 20 else: gmx.make_ndx( f=frame_tpr, n=rec_lig_ndx, o=rec_lig_ndx, input=( '15 | 17', 'name 19 receptor', # 19 '16 | 18', 'name 20 ligand', # 20 '19 | 20', 'name 21 com', 'q')) # 21 "deal with log and temp intermediate files 1" with open(log_file, 'a', encoding='utf-8') as fw: fw.writelines(rec_lig_ndx + ': ' + str(len(RHOHs)) + ', ' + str(len(LHOHs)) + '\n' + str(i) + 'ps\n' + ' LHOHs: ' + select_LH_cmd + '\n' + ' RHOHs: ' + select_RH_cmd + '\n') os.system('rm -v ' + temp_ndx) os.system('rm -v ' + frame_sys_pdb) os.system('rm -v \#*') # delete all # starting files "run MMPBSA script" os.system('mkdir -p -v ' + str(i)) run_api(dir=str(i), tpr='../' + frame_tpr, xtc=frame_xtc, ndx=rec_lig_ndx, com='com', rec='receptor', lig='ligand', b=1, e=10001, i=1) os.system('rm ' + frame_tpr) os.system('rm -v ' + frame_xtc) # os.system('rm -v ' + frame_pdb) os.system('rm -v ' + rec_lig_ndx) os.system('rm -v ' + dsthoh_idx) "deal with log" with open(log_file, 'a', encoding='utf-8') as fw: fw.writelines(' info: \n' + ' -R_idx ' + str(R_idx[0]) + ' ' + str(R_idx[1]) + '\n' + ' -L_idx ' + str(L_idx[0]) + ' ' + str(L_idx[1]) + '\n' + ' -bond_d ' + str(bond_d) + '\n' + ' ' + time.strftime("%a %b %d %H:%M:%S %Y", time.localtime())) fw.writelines('\n') pass
def make_index(struct, ndx='main.ndx', oldndx=None): """Make index file with the special groups. This routine adds the group __main__ and the group __environment__ to the end of the index file. __main__ contains what the user defines as the *central* and *most important* parts of the system. __environment__ is everything else. The template mdp file, for instance, uses these two groups for T-coupling. These groups are mainly useful if the default groups "Protein" and "Non-Protein" are not appropriate. By using symbolic names such as __main__ one can keep scripts more general. :Returns: *groups* is a list of dictionaries that describe the index groups. See :func:`gromacs.cbook.parse_ndxlist` for details. :Arguments: *struct* : filename structure (tpr, pdb, gro) *selection* : string is a ``make_ndx`` command such as ``"Protein"`` or ``r DRG`` which determines what is considered the main group for centering etc. It is passed directly to ``make_ndx``. *ndx* : string name of the final index file *oldndx* : string name of index file that should be used as a basis; if None then the ``make_ndx`` default groups are used. This routine is very dumb at the moment; maybe some heuristics will be added later as could be other symbolic groups such as __membrane__. """ #logging.info("Building the main index file %(ndx)r..." % vars()) print("make_ndx") # pass 1: select # empty command '' important to get final list of groups rc, out, nothing = gromacs.make_ndx( f=struct, n=oldndx, o=ndx, stdout=False, #@UndefinedVariable input=('q')) #groups = gromacs.cbook.parse_ndxlist(out) #last = len(groups) - 1 #assert last == groups[-1]['nr'] # pass 2: # 1) last group is __main__ # 2) __environment__ is everything else (eg SOL, ions, ...) #rc,out,nothing = gromacs.make_ndx(f=struct, n=ndx, o=ndx, # stdout=False, # input=('name %d __main__' % last, # '! "__main__"', # is now group last+1 # 'name %d __environment__' % (last+1), # '', 'q')) print("done") print(out) return gromacs.cbook.parse_ndxlist(out)
def apply_windows(xtc, tpr, R_idx, L_idx, frames_idx, win_params, num_hyHOH, fr_per_ps=1, threshold=0.4, bond_d=2.07): [begin, final, win_len, win_stride] = win_params # 20220527 windows expending at frame_idx for idx in frames_idx: start = float(idx)/fr_per_ps - int(win_len/2) end = start + win_len if start < begin: start = begin if end > final: end = final # original conv # for start in range(begin, final, win_stride): # end = start + win_len # 20220606 find the threshold of SOL RMSF # short_pro_rmsf_xvg = str(idx) + '_pro_rmsf.xvg' # gmx.rmsf(s=tpr, f=xtc, o=short_pro_rmsf_xvg, b=start, e=end, input='Protein') thr = threshold + (np.maximum(idx-3000, 0) / (final - begin))*0.5 cs.print('RMSF threshold: ', thr, style=f"red") cs.rule('Processing window: '+str(start)+'-'+str(end)+' ps, '+str(start*fr_per_ps)+'-'+str(end*fr_per_ps)+' f') # TODO: rerun control # if start <= 8000: # continue temp_ave_pdb = str(start) + '_' + str(end) + '_tmp.pdb' temp_ndx = str(start) + '_' + str(end) + '_tmp.ndx' short_ndx = str(start) + '_' + str(end) + '.ndx' short_xtc = str(start) + '_' + str(end) + '.xtc' short_tpr = str(start) + '_' + str(end) + '.tpr' short_frame_idx = str(start) + '_' + str(end) + '_frame_idx.ndx' short_rmsf_xvg = str(start) + '_' + str(end) + '_rmsf.xvg' # short_ave_pdb = str(start) + '_' + str(end) + '_ave.pdb' # "Determines whether to perform this window" # fr_idx = [] # for idx in frames_idx: # if start <= float(idx)/fr_per_ps <= end: # fr_idx.append(float(idx)) # if len(fr_idx) == 0: # cs.print('No frames located in this window!!!!!, skip it.', style=f'red') # continue # else: # with open(short_frame_idx, 'w') as f: # f.writelines('[ frames ]\n') # f.writelines('\n'.join([str(e) for e in fr_idx])) # f.writelines('\n') # cs.print('Frames index for calculating:\n', np.array(fr_idx)) with open(short_frame_idx, 'w') as f: f.writelines('[ frames ]\n') f.writelines(str(idx)) f.writelines('\n') cs.print('Frames index for calculating:\n', idx) cs.log('Generate temp files ...', style=f'blue') gmx.make_ndx(f=tpr, o=temp_ndx, input='q') "run gmx-rmsf on this windows to cal all waters RMSF" gmx.rmsf(s=tpr, f=xtc, o=short_rmsf_xvg, res='true', b=start, e=end, n=temp_ndx, input='SOL') gmx.rmsf(s=tpr, f=xtc, ox=temp_ave_pdb, b=start, e=end, n=temp_ndx, input='System') cs.log('Searching Hy-HOHs ...', style=f'blue') try: ice_idx = idx_hyhoh_by_RMSF(short_rmsf_xvg, num_hyHOH, thr) except ExceptionPassing as e: cs.print(e.message, style=f"red") os.system('rm -v ' + str(start) + '_' + str(end) + '*') continue "get heavy Atom object of R, L and waters. See this_project/PDB.io.reader and Atom class for more details" protein_atoms, waters = structure_serialize(temp_ave_pdb, ['N', 'C', 'O']) "get ice object" ices = [ice for ice in waters if ice.res_seq in ice_idx] "assign hydration HOH to R, L according to calculated nearest distance from R, L to hyHOHs, respectively" RHOHs, LHOHs = assign_hyhoh(protein_atoms, ices, R_idx, L_idx, bond_d) del ices, protein_atoms, waters # garbage collection cs.print('\nNum of R/L HOH: ', len(RHOHs), len(LHOHs)) cs.print('R_HOHs:\n', np.array(RHOHs)) cs.print('L_HOHs:\n', np.array(LHOHs)) if len(RHOHs) == 0 and len(LHOHs) == 0: cs.print('\nWARNING IN ASSIGNMENT1: No hyhoh found!', style=f"red") os.system('rm -v ' + str(start) + '_' + str(end) + '*') continue # if len(RHOHs) + len(LHOHs) < 5: # cs.print('\nWARNING IN ASSIGNMENT2!!!!!!!', style=f"red") # os.system('rm -v ' + str(start) + '_' + str(end) + '*') # continue "run gmx-make_ndx to address (Protein + hydration HOH)" hyHOH_list = RHOHs + LHOHs hyHOH_list.sort() gmx.make_ndx(f=tpr, n=temp_ndx, o=temp_ndx, input=('r ' + ' '.join(str(hoh) for hoh in hyHOH_list), 'name 19 hyHOH', '1 | 19', # or indicates union set 'name 20 com', 'q')) # 19 cs.log("generate short-term xtc and sub-group tpr for mmpbsa", style=f'blue') # gmx.trjconv(f=xtc, o=short_xtc, b=start, e=end, n=temp_ndx, input='20') gmx.trjconv(f=xtc, o=short_xtc, fr=short_frame_idx, n=temp_ndx, input='20') # gmx.convert_tpr(s=tpr, o=short_tpr, n=temp_ndx, nsteps=-1, input='20') gmx.convert_tpr(s=tpr, o=short_tpr, n=temp_ndx, input='20') "generate short-term average pdb for show and check, can be deleted" # gmx.rmsf(s=tpr, f=xtc, ox=short_ave_pdb, b=start, e=end, n=temp_ndx, input='20') "make new index for short_tpr and short_xtc" # grp_RHOHs = 'r_' + '_'.join(str(hoh) for hoh in RHOHs) # grp_LHOHs = 'r_' + '_'.join(str(hoh) for hoh in LHOHs) select_RH_cmd = 'r ' + ' '.join(str(hoh) for hoh in RHOHs) select_LH_cmd = 'r ' + ' '.join(str(hoh) for hoh in LHOHs) # grp_R = 'r_' + str(R_idx[0]) + '-' + str(R_idx[1]) # grp_L = 'r_' + str(L_idx[0]) + '-' + str(L_idx[1]) select_R_cmd = 'ri ' + str(R_idx[0]) + '-' + str(R_idx[1]) select_L_cmd = 'ri ' + str(L_idx[0]) + '-' + str(L_idx[1]) # select_com_cmd = '"Protein" | "' + grp_RHOHs + '" | "' + grp_LHOHs + '"' # grp_com = 'Protein_' + grp_RHOHs + '_' + grp_LHOHs gmx.make_ndx(f=short_tpr, o=short_ndx, input=(select_R_cmd, select_L_cmd, # 15 16 'name 15 r_p', 'name 16 l_p', select_RH_cmd, select_LH_cmd, # 17 (18) 'name 17 r_HOH', 'name 18 l_HOH', 'q')) # grp_R_pw = grp_R + '_' + grp_RHOHs # grp_L_pw = grp_L + '_' + grp_LHOHs # select_RPW_cmd = '"' + grp_R + '" | "' + grp_RHOHs + '"' # select_LPW_cmd = '"' + grp_L + '" | "' + grp_LHOHs + '"' if len(RHOHs) == 0: gmx.make_ndx(f=short_tpr, n=short_ndx, o=short_ndx, input=('15', 'name 18 receptor', # 18 '16 | 17', 'name 19 ligand', # 19 '18 | 19', 'name 20 com', 'q')) # 20 elif len(LHOHs) == 0: gmx.make_ndx(f=short_tpr, n=short_ndx, o=short_ndx, input=('15 | 17', 'name 18 receptor', # 18 '16', 'name 19 ligand', # 19 '18 | 19', 'name 20 com', 'q')) # 20 else: gmx.make_ndx(f=short_tpr, n=short_ndx, o=short_ndx, input=('15 | 17', 'name 19 receptor', # 19 '16 | 18', 'name 20 ligand', # 20 '19 | 20', 'name 21 com', 'q')) # 21 "deal with log and temp intermediate files 1" with open(log_file, 'a', encoding='utf-8') as fw: fw.writelines(short_ndx + ': ' + str(len(RHOHs)) + ', ' + str(len(LHOHs)) + '\n' + # '\n'.join([str(e) for e in fr_idx]) + '\n' + str(idx) + '\n' + ' LHOHs: ' + select_LH_cmd + '\n' + ' RHOHs: ' + select_RH_cmd + '\n') os.system('rm -v ' + temp_ndx) os.system('rm -v ' + temp_ave_pdb) os.system('rm -v rmsf.xvg') os.system('rm -v \#*') # delete all # starting files "run MMPBSA script" os.system('mkdir -p -v '+str(start)+'_'+str(end)) run_api(dir=str(start)+'_'+str(end), tpr='../'+short_tpr, xtc=short_xtc, ndx=short_ndx, com='com', rec='receptor', lig='ligand', b=start, e=end, i=1) os.system('rm ' + short_xtc) os.system('rm ' + short_ndx) os.system('rm ' + short_tpr) os.system('rm ' + short_frame_idx) os.system('rm ' + short_rmsf_xvg) "deal with log" with open(log_file, 'a', encoding='utf-8') as fw: fw.writelines(' info: \n' + ' -win_params ' + str(win_params[0]) + ' ' + str(win_params[1]) + '\n' + ' -R_idx ' + str(R_idx[0]) + ' ' + str(R_idx[1]) + '\n' + ' -L_idx ' + str(L_idx[0]) + ' ' + str(L_idx[1]) + '\n' + ' -thr ' + str(thr) + '\n' + ' -bond_d ' + str(bond_d) + '\n' + ' -num_hyHOH ' + str(num_hyHOH) + '\n' + ' ' + time.strftime("%a %b %d %H:%M:%S %Y", time.localtime())) fw.writelines('\n')
def solvate_ion(struct='solvated.gro', top='top/system.top', concentration=0, cation='NA', anion='CL', solvent_name='SOL', ndx='main.ndx', mainselection='"Protein"', dirname='solvate', **kwargs): structure = realpath(struct) topology = realpath(top) # By default, grompp should not choke on a few warnings because at # this stage the user cannot do much about it (can be set to any # value but is kept undocumented...) grompp_maxwarn = kwargs.pop('maxwarn', 10) # handle additional include directories (kwargs are also modified!) mdp_kwargs = cbook.add_mdp_includes(topology, kwargs) with in_dir(dirname): with open('none.mdp', 'w') as mdp: mdp.write( '; empty mdp file\ninclude = {include!s}\nrcoulomb = 1\nrvdw = 1\nrlist = 1\n' .format(**mdp_kwargs)) qtotgmx = cbook.grompp_qtot(f='none.mdp', o='topol.tpr', c=structure, p=topology, stdout=False, maxwarn=grompp_maxwarn) qtot = round(qtotgmx) logger.info( "[{dirname!s}] After solvation: total charge qtot = {qtotgmx!r} = {qtot!r}" .format(**vars())) if concentration != 0: logger.info( "[{dirname!s}] Adding ions for c = {concentration:f} M...". format(**vars())) # target concentration of free ions c ==> # N = N_water * c/c_water # add ions for concentration to the counter ions (counter ions are less free) # # get number of waters (count OW ... works for SPC*, TIP*P water models) rc, output, junk = gromacs.make_ndx(f='topol.tpr', o='ow.ndx', input=('keep 0', 'del 0', 'a OW*', 'name 0 OW', '', 'q'), stdout=False) groups = cbook.parse_ndxlist(output) gdict = {g['name']: g for g in groups} # overkill... N_water = gdict['OW']['natoms'] # ... but dict lookup is nice N_ions = int(N_water * concentration / CONC_WATER) # number of monovalents else: N_ions = 0 # neutralize (or try -neutral switch of genion???) n_cation = n_anion = 0 if qtot > 0: n_anion = int(abs(qtot)) elif qtot < 0: n_cation = int(abs(qtot)) n_cation += N_ions n_anion += N_ions if n_cation != 0 or n_anion != 0: # sanity check: assert qtot + n_cation - n_anion < 1e-6 logger.info( "[{dirname!s}] Adding n_cation = {n_cation:d} and n_anion = {n_anion:d} ions..." .format(**vars())) gromacs.genion(s='topol.tpr', o='ionized.gro', p=topology, pname=cation, nname=anion, np=n_cation, nn=n_anion, input=solvent_name) else: # fake ionized file ... makes it easier to continue without too much fuzz try: os.unlink('ionized.gro') except OSError as err: if err.errno != errno.ENOENT: raise os.symlink('solvated.gro', 'ionized.gro') qtot = cbook.grompp_qtot(f='none.mdp', o='ionized.tpr', c='ionized.gro', p=topology, stdout=False, maxwarn=grompp_maxwarn) if abs(qtot) > 1e-4: wmsg = "System has non-zero total charge qtot = {qtot:g} e.".format( **vars()) warnings.warn(wmsg, category=BadParameterWarning) logger.warn(wmsg) # make main index try: make_main_index('ionized.tpr', selection=mainselection, ndx=ndx) except GromacsError as err: # or should I rather fail here? wmsg = "Failed to make main index file %r ... maybe set mainselection='...'.\n"\ "The error message was:\n%s\n" % (ndx, str(err)) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning) try: trj_compact_main(f='ionized.gro', s='ionized.tpr', o='compact.pdb', n=ndx) except GromacsError as err: wmsg = "Failed to make compact pdb for visualization... pressing on regardless. "\ "The error message was:\n%s\n" % str(err) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning) return { 'qtot': qtot, 'struct': realpath(dirname, 'ionized.gro'), 'ndx': realpath(dirname, ndx), # not sure why this is propagated-is it used? 'mainselection': mainselection, }
def solvate_ion(struct='solvated.gro', top='top/system.top', concentration=0, cation='NA', anion='CL', solvent_name='SOL', ndx='main.ndx', mainselection='"Protein"', dirname='solvate', **kwargs): structure = realpath(struct) topology = realpath(top) # By default, grompp should not choke on a few warnings because at # this stage the user cannot do much about it (can be set to any # value but is kept undocumented...) grompp_maxwarn = kwargs.pop('maxwarn',10) # handle additional include directories (kwargs are also modified!) mdp_kwargs = cbook.add_mdp_includes(topology, kwargs) with in_dir(dirname): with open('none.mdp','w') as mdp: mdp.write('; empty mdp file\ninclude = {include!s}\nrcoulomb = 1\nrvdw = 1\nrlist = 1\n'.format(**mdp_kwargs)) qtotgmx = cbook.grompp_qtot(f='none.mdp', o='topol.tpr', c=structure, p=topology, stdout=False, maxwarn=grompp_maxwarn) qtot = round(qtotgmx) logger.info("[{dirname!s}] After solvation: total charge qtot = {qtotgmx!r} = {qtot!r}".format(**vars())) if concentration != 0: logger.info("[{dirname!s}] Adding ions for c = {concentration:f} M...".format(**vars())) # target concentration of free ions c ==> # N = N_water * c/c_water # add ions for concentration to the counter ions (counter ions are less free) # # get number of waters (count OW ... works for SPC*, TIP*P water models) rc,output,junk = gromacs.make_ndx(f='topol.tpr', o='ow.ndx', input=('keep 0', 'del 0', 'a OW*', 'name 0 OW', '', 'q'), stdout=False) groups = cbook.parse_ndxlist(output) gdict = {g['name']: g for g in groups} # overkill... N_water = gdict['OW']['natoms'] # ... but dict lookup is nice N_ions = int(N_water * concentration/CONC_WATER) # number of monovalents else: N_ions = 0 # neutralize (or try -neutral switch of genion???) n_cation = n_anion = 0 if qtot > 0: n_anion = int(abs(qtot)) elif qtot < 0: n_cation = int(abs(qtot)) n_cation += N_ions n_anion += N_ions if n_cation != 0 or n_anion != 0: # sanity check: assert qtot + n_cation - n_anion < 1e-6 logger.info("[{dirname!s}] Adding n_cation = {n_cation:d} and n_anion = {n_anion:d} ions...".format(**vars())) gromacs.genion(s='topol.tpr', o='ionized.gro', p=topology, pname=cation, nname=anion, np=n_cation, nn=n_anion, input=solvent_name) else: # fake ionized file ... makes it easier to continue without too much fuzz try: os.unlink('ionized.gro') except OSError as err: if err.errno != errno.ENOENT: raise os.symlink('solvated.gro', 'ionized.gro') qtot = cbook.grompp_qtot(f='none.mdp', o='ionized.tpr', c='ionized.gro', p=topology, stdout=False, maxwarn=grompp_maxwarn) if abs(qtot) > 1e-4: wmsg = "System has non-zero total charge qtot = {qtot:g} e.".format(**vars()) warnings.warn(wmsg, category=BadParameterWarning) logger.warn(wmsg) # make main index try: make_main_index('ionized.tpr', selection=mainselection, ndx=ndx) except GromacsError as err: # or should I rather fail here? wmsg = "Failed to make main index file %r ... maybe set mainselection='...'.\n"\ "The error message was:\n%s\n" % (ndx, str(err)) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning) try: trj_compact_main(f='ionized.gro', s='ionized.tpr', o='compact.pdb', n=ndx) except GromacsError as err: wmsg = "Failed to make compact pdb for visualization... pressing on regardless. "\ "The error message was:\n%s\n" % str(err) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning) return {'qtot': qtot, 'struct': realpath(dirname, 'ionized.gro'), 'ndx': realpath(dirname, ndx), # not sure why this is propagated-is it used? 'mainselection': mainselection, }
def add_box_mutation(self): """ Function to add the solvent and run a minimization of the local amino acid with the waters included Output: system.gro -- file containing the complete system minimized after the mutation """ # Read the solvent os.system("grep ATOM {path}/solvent/solvent_{itera}.pdb | grep -v ENDMDL > {path}/solvent.pdb".format(path=self.path,itera=self.last_iteration)) # Concatenate complex and system os.system("cat {path}/complex.pdb {path}/solvent.pdb > {path}/system.pdb".format(path=self.path)) rc,sout,serr=gromacs.editconf(f=self.path+"/system.pdb", o=self.path+"/system_mod.pdb", stdout=False) os.system("mv {}/system_mod.pdb {}/system.pdb".format(self.path,self.path)) # Make an index of the system rc,sout,serr=gromacs.make_ndx(f=self.path+"/system.pdb", o=self.path+"/index.ndx", stdout=False, input=('chain {} \n q'.format(self.pep_chain))) # Copy the topol files of the target chains, which are the same always if self.target=="protein": for ch in self.chain_join: os.system("cp {}/topol_Protein_chain_{}.itp {}/system_Protein_chain_{}.itp".format(self.path,ch,self.path,ch)) if self.target=="drug": for ch in self.chain_join: os.system("cp {}/topol_Drug_chain_{}.itp {}/system_Drug_chain_{}.itp".format(self.path,ch,self.path,ch)) # Copy the topol.top to system.top os.system("cp {}/topol.top {}/system.top".format(self.path,self.path)) os.system("cp {}/complex_Protein_chain_{}.itp {}/system_Protein_chain_{}.itp".format(self.path,self.pep_chain,self.path,self.pep_chain)) os.system("sed -i 's/topol_/system_/g' {}/system.top".format(self.path)) # Select water and ions within 0.2 distance of the residue rc,sout,serr=gromacs.make_ndx(f=self.path+"/system.pdb", o=self.path+"/index.ndx", stdout=False, input=('chain {}'.format(self.pep_chain),'q')) rc,sout,serr=gromacs.select(f=self.path+"/system.pdb", n=self.path+"/index.ndx", s=self.path+"/system.pdb", on=self.path+"/index_sol.ndx", stdout=False, select="group Water_and_ions and same residue as within 0.2 of (group ch{} and resnr {})".format(self.pep_chain,self.pep_position)) # Solve the issue with atoms overlapped with the selected residue values=[x.strip() for x in open(self.path+"/index_sol.ndx")] atomsSOL=[] for i,v in enumerate(values): if i!=0: info=v.split() atomsSOL=atomsSOL+info # List of the atoms that will be deleted atomsDelete=[] # Check the overlapped atoms for a in atomsSOL: # Obtain the list of atoms from the index file bash = "awk '$2 == '{}' {{print $6','$7','$8}}' {}/system.pdb".format(a,self.path) coordinates = subprocess.check_output(['bash','-c', bash]) comp=coordinates.strip().split() comparison=[] for c in comp: comparison.append(float(c)) ndComp=np.array(comparison) distancesSOL=[] # Read the structure in biopython parser = PDBParser() structure = parser.get_structure('PEP', self.path+"/system.pdb") model = structure[0] # Check the distances with all the atoms from the selected residue for residue in model[self.pep_chain]: resC=residue.get_resname() resNumber=residue.get_full_id()[3][1] if resNumber==self.pep_position: for atom in residue: idAtom = atom.get_id() if idAtom[0].isdigit() == False: if resC=="ILE" and idAtom=="CD": idAtom="CD1" diff = atom.coord - ndComp diffValue=np.sqrt(np.sum(diff * diff)) distancesSOL.append(float(diffValue)) # Threshold to determine which atoms can be overlapped if min(distancesSOL)<1.0: if a not in atomsDelete: atomsDelete.append(a) # Selection of the final atoms that will be included in the index final_index=[] for element in atomsSOL: flag=0 for delete in atomsDelete: if abs(int(element)-int(delete))<=2: flag=1 if flag==0: final_index.append(element) # Update of the index sol file new_index=open(self.path+"/index_sol2.ndx","w") new_index.write("{}\n".format(values[0])) group=[] counter=1 for ele in final_index: if counter <15: group.append(ele) counter+=1 else: group.append(ele) new_index.write(" ".join(group)+" \n") counter=1 group=[] new_index.write(" ".join(group)+" ") new_index.close() # Update the file os.system("mv {}/index_sol2.ndx {}/index_sol.ndx".format(self.path,self.path)) ref_ndx = NDX() ref_ndx.read(self.path+"/index.ndx") bash="grep '\[' {}/index.ndx | wc -l".format(self.path) number_index = subprocess.check_output(['bash','-c', bash]) index_ref=int(number_index)-1 #index_ref=len(ref_ndx)-1 # Create the side chain index in a template file os.system("echo 'name 0 overlap' > %s/template" %self.path) os.system("echo '\"SideChain\" & \"ch{}\" & r {}' >> {}/template".format(self.pep_chain,str(self.pep_position),self.path)) os.system("echo '\"overlap\" | \"SideChain_&_ch{}_&_r_{}\"' >> {}/template".format(self.pep_chain,str(self.pep_position),self.path)) os.system("echo '\"System\" &! \"overlap_SideChain_&_ch{}_&_r_{}\"' >> {}/template".format(self.pep_chain,str(self.pep_position),self.path)) os.system("echo 'q' >> {}/template".format(self.path)) # Create an index joining both created before os.system("gmx -quiet make_ndx -f {path}/system.pdb -n {path}/index_sol.ndx {path}/index.ndx -o {path}/total_index.ndx < {path}/template".format(path=self.path)) os.system("sed -i 's/System_&_\!overlap_SideChain_&_ch{}_&_r_{}/to_block/g' {}/total_index.ndx".format(self.pep_chain,str(self.pep_position),self.path)) # Generate the gro file rc,sout,serr=gromacs.editconf(f=self.path+"/system.pdb", o=self.path+"/system.gro", stdout=False) # Prepare the files for the minimization and run rc,sout,serr=gromacs.grompp(f=self.path+"/mdp/minim_overlap.mdp", o=self.path+"/systemNEW.tpr", p=self.path+"/system.top", n=self.path+"/total_index.ndx", c=self.path+"/system.gro", stdout=False) gromacs.utilities.unlink_gmx("mdout.mdp") print("Running second minimization ...") rc,sout,serr=gromacs.mdrun(deffnm=self.path+"/systemNEW", stdout=False) # Copy the system.gro file that will be used to run the last minimization os.system("cp {}/systemNEW.gro {}/system.gro".format(self.path,self.path)) os.system("sed -i '$ d' {}/system.gro".format(self.path)) os.system("tail -n1 {path}/npt-pbc.gro >> {path}/system.gro".format(path=self.path)) # Delete temporal files os.system("rm {path}/complex.pdb {path}/solvent.pdb {path}/systemNEW* {path}/template {path}/index.ndx {path}/index_sol.ndx {path}/total_index.ndx *.itp".format(path=self.path))
def run_minim_complex(self,run_minim=False): """ Function to run a local minimization on the side chain that was mutated Arguments: run_minim -- boolean flag that will control if the minimization is run or not Output: complex.pdb -- new complex pdb with the minimization and the new itp files """ # Get the chain with the peptide to generate a novel itp file os.system("python3 {}/src/scores/get_chains.py {}/complex.pdb {}".format(self.path_scores,self.path,self.path)) rc,sout,serr=gromacs.pdb2gmx(f=self.path+"/complex_"+self.pep_chain+".pdb", p=self.path+"/binder.top", o=self.path+"/complex_"+self.pep_chain+".gro", stdout=False, input=('6','6')) os.system("sed -i '/forcefield/d' {}/binder.top".format(self.path)) os.system("sed -i '/\[ system \]/,$d' {}/binder.top".format(self.path)) os.system("mv {}/binder.top {}/complex_Protein_chain_{}.itp".format(self.path,self.path,self.pep_chain)) rc,sout,serr=gromacs.editconf(f=self.path+"/complex_"+self.pep_chain+".gro", o=self.path+"/complex_"+self.pep_chain+".pdb", stdout=False) # Fix the amino acid nomenclature os.system("for i in ASP ARG HIS HIE HID HIP LYS GLU SER THR ASN GLN CYS CYX GLY PRO ALA VAL ILE LEU MET PHE TYR TRP; do sed -i s/\"$i \"/\"$i {}\"/g {}/complex_{}.pdb; done".format(self.pep_chain,self.path,self.pep_chain)) for i,ch in enumerate(self.chain_join): if i==0: os.system("grep ATOM {}/complex_{}.pdb > {}/complex.pdb".format(self.path,ch,self.path)) os.system("echo 'TER' >> {}/complex.pdb".format(self.path)) else: os.system("grep ATOM {}/complex_{}.pdb >> {}/complex.pdb".format(self.path,ch,self.path)) os.system("echo 'TER' >> {}/complex.pdb".format(self.path)) # Get the new complex.pdb and the peptide chain itp file and delete temporal files os.system("grep ATOM {}/complex_{}.pdb >> {}/complex.pdb".format(self.path,self.pep_chain,self.path)) os.system("echo 'TER' >> {}/complex.pdb".format(self.path)) os.system("rm {}/complex_*.pdb".format(self.path)) os.system("rm {}/complex_*.gro".format(self.path)) os.system("rm {}/chains.seq".format(self.path)) os.system("head -n -18 {}/complex_Protein_chain_{}.itp > {}/temp; mv {}/temp {}/complex_Protein_chain_{}.itp".format(self.path,self.pep_chain,self.path,self.path,self.path,self.pep_chain)) # Copy the topol files of the target chains, which are the same always # Copy the topol files of the target chains, which are the same always if self.target=="protein": for ch in self.chain_join: os.system("cp {}/topol_Protein_chain_{}.itp {}/complex_Protein_chain_{}.itp".format(self.path,ch,self.path,ch)) if self.target=="drug": for ch in self.chain_join: os.system("cp {}/topol_Drug_chain_{}.itp {}/complex_Drug_chain_{}.itp".format(self.path,ch,self.path,ch)) # Copy the topol.top to complex.top and delete all the additional atoms os.system("cp {}/topol.top {}/complex.top".format(self.path,self.path)) os.system("sed -i '/Ion/d' {}/complex.top".format(self.path)) os.system("sed -i '/SOL/d' {}/complex.top".format(self.path)) os.system("sed -i '/NA/d' {}/complex.top".format(self.path)) os.system("sed -i '/CL/d' {}/complex.top".format(self.path)) os.system("sed -i '/solvent/d' {}/complex.top".format(self.path)) os.system("sed -i 's/topol_/complex_/g' {}/complex.top".format(self.path)) # Get a pdb of the complex where an index will be created rc,sout,serr=gromacs.make_ndx(f=self.path+"/complex.pdb", o=self.path+"/reference.ndx", stdout=False, input=('q')) ref_ndx = NDX() ref_ndx.read(self.path+"/reference.ndx") #index_ref=len(ref_ndx)-1 bash="grep '\[' {}/reference.ndx | wc -l".format(self.path) number_index = subprocess.check_output(['bash','-c', bash]) index_ref=int(number_index)-1 gromacs.utilities.unlink_gmx(self.path+"/reference.ndx") # Create the side chain index input_for_ndx=() counter=index_ref input_for_ndx+=('chain {}'.format(self.pep_chain),); counter+=1 input_for_ndx+=('name {} binder'.format(counter),); input_for_ndx+=('"SideChain" & "binder"'+' & r {}'.format(self.pep_position),); counter+=1 input_for_ndx+=('"System" &! {}'.format(counter),); counter+=1 input_for_ndx+=('name {} scmut'.format(counter),); sentence="" for i,ch in enumerate(self.chain_join): if i==0: sentence=sentence+"chain {}".format(ch) else: sentence=sentence+" | chain {}".format(ch) input_for_ndx+=(sentence,); counter+=1 input_for_ndx+=('name {} target'.format(counter),) input_for_ndx+=('\"target\" | \"binder\"',); counter+=1 input_for_ndx+=('name {} complex'.format(counter),) input_for_ndx+=('q',) # Generate the index file rc,sout,serr=gromacs.make_ndx(f=self.path+"/complex.pdb", o=self.path+"/scmut.ndx", stdout=False, input=input_for_ndx) # Generate the gro file rc,sout,serr=gromacs.editconf(f=self.path+"/complex.pdb", o=self.path+"/complex.gro", stdout=False) # Add a small box for the residues os.system("sed -i '$ d' {}/complex.gro".format(self.path)) os.system('echo " 20.0 20.0 20.0" >> {path}/complex.gro'.format(path=self.path)) # Prepare the files for the minimization rc,sout,serr=gromacs.grompp(f=self.path+"/mdp/minim_scmut.mdp", o=self.path+"/complex.tpr", p=self.path+"/complex.top", n=self.path+"/scmut.ndx", c=self.path+"/complex.gro", stdout=False) gromacs.utilities.unlink_gmx("mdout.mdp") # Run the minimization of the side chain alone and the residues around it if run_minim: # Run the minimization print("Running first minimization ...") rc,sout,serr=gromacs.mdrun(deffnm=self.path+"/complex", stdout=False) # Get the complex pdb file rc,sout,serr=gromacs.trjconv(f=self.path+"/complex.gro",s=self.path+"/complex.tpr", n=self.path+"/scmut.ndx", o=self.path+"/min_complex.pdb",stdout=False,input=("complex")) os.system("rm posre.itp {path}/complex.tpr {path}/complex.top; grep -v ENDMDL {path}/min_complex.pdb | grep -v MODEL > {path}/complex.pdb; rm {path}/min_complex.pdb {path}/complex.log {path}/complex.trr {path}/complex.edr {path}/scmut.ndx".format(path=self.path))
def make_main_index(struct, selection='"Protein"', ndx='main.ndx', oldndx=None): """Make index file with the special groups. This routine adds the group __main__ and the group __environment__ to the end of the index file. __main__ contains what the user defines as the *central* and *most important* parts of the system. __environment__ is everything else. The template mdp file, for instance, uses these two groups for T-coupling. These groups are mainly useful if the default groups "Protein" and "Non-Protein" are not appropriate. By using symbolic names such as __main__ one can keep scripts more general. :Returns: *groups* is a list of dictionaries that describe the index groups. See :func:`gromacs.cbook.parse_ndxlist` for details. :Arguments: *struct* : filename structure (tpr, pdb, gro) *selection* : string is a ``make_ndx`` command such as ``"Protein"`` or ``r DRG`` which determines what is considered the main group for centering etc. It is passed directly to ``make_ndx``. *ndx* : string name of the final index file *oldndx* : string name of index file that should be used as a basis; if None then the ``make_ndx`` default groups are used. This routine is very dumb at the moment; maybe some heuristics will be added later as could be other symbolic groups such as __membrane__. """ logger.info("Building the main index file {ndx!r}...".format(**vars())) # pass 1: select # get a list of groups # need the first "" to get make_ndx to spit out the group list. _, out, _ = gromacs.make_ndx(f=struct, n=oldndx, o=ndx, stdout=False, input=("", "q")) groups = cbook.parse_ndxlist(out) # find the matching groups, # there is a nasty bug in GROMACS where make_ndx may have multiple # groups, which caused the previous approach to fail big time. # this is a work around the make_ndx bug. # striping the "" allows compatibility with existing make_ndx selection commands. selection = selection.strip("\"") selected_groups = [ g for g in groups if g['name'].lower() == selection.lower() ] if len(selected_groups) > 1: logging.warn( "make_ndx created duplicated groups, performing work around") if len(selected_groups) <= 0: msg = "no groups found for selection {0}, available groups are {1}".format( selection, groups) logging.error(msg) raise ValueError(msg) # Found at least one matching group, we're OK # index of last group last = len(groups) - 1 assert last == groups[-1]['nr'] group = selected_groups[0] # pass 2: # 1) last group is __main__ # 2) __environment__ is everything else (eg SOL, ions, ...) _, out, _ = gromacs.make_ndx( f=struct, n=ndx, o=ndx, stdout=False, # make copy selected group, this now has index last + 1 input=( "{0}".format(group['nr']), # rename this to __main__ "name {0} __main__".format(last + 1), # make a complement to this group, it get index last + 2 "! \"__main__\"", # rename this to __environment__ "name {0} __environment__".format(last + 2), # list the groups "", # quit "q")) return cbook.parse_ndxlist(out)
def solvate(struct='top/protein.pdb', top='top/system.top', distance=0.9, boxtype='dodecahedron', concentration=0, cation='NA', anion='CL', water='spc', solvent_name='SOL', with_membrane=False, ndx='main.ndx', mainselection='"Protein"', dirname='solvate', **kwargs): """Put protein into box, add water, add counter-ions. Currently this really only supports solutes in water. If you need to embedd a protein in a membrane then you will require more sophisticated approaches. However, you *can* supply a protein already inserted in a bilayer. In this case you will probably want to set *distance* = ``None`` and also enable *with_membrane* = ``True`` (using extra big vdw radii for typical lipids). .. Note:: The defaults are suitable for solvating a globular protein in a fairly tight (increase *distance*!) dodecahedral box. :Arguments: *struct* : filename pdb or gro input structure *top* : filename Gromacs topology *distance* : float When solvating with water, make the box big enough so that at least *distance* nm water are between the solute *struct* and the box boundary. Set *boxtype* to ``None`` in order to use a box size in the input file (gro or pdb). *boxtype* or *bt*: string Any of the box types supported by :class:`~gromacs.tools.Editconf` (triclinic, cubic, dodecahedron, octahedron). Set the box dimensions either with *distance* or the *box* and *angle* keywords. If set to ``None`` it will ignore *distance* and use the box inside the *struct* file. *bt* overrides the value of *boxtype*. *box* List of three box lengths [A,B,C] that are used by :class:`~gromacs.tools.Editconf` in combination with *boxtype* (``bt`` in :program:`editconf`) and *angles*. Setting *box* overrides *distance*. *angles* List of three angles (only necessary for triclinic boxes). *concentration* : float Concentration of the free ions in mol/l. Note that counter ions are added in excess of this concentration. *cation* and *anion* : string Molecule names of the ions. This depends on the chosen force field. *water* : string Name of the water model; one of "spc", "spce", "tip3p", "tip4p". This should be appropriate for the chosen force field. If an alternative solvent is required, simply supply the path to a box with solvent molecules (used by :func:`~gromacs.genbox`'s *cs* argument) and also supply the molecule name via *solvent_name*. *solvent_name* Name of the molecules that make up the solvent (as set in the itp/top). Typically needs to be changed when using non-standard/non-water solvents. ["SOL"] *with_membrane* : bool ``True``: use special ``vdwradii.dat`` with 0.1 nm-increased radii on lipids. Default is ``False``. *ndx* : filename How to name the index file that is produced by this function. *mainselection* : string A string that is fed to :class:`~gromacs.tools.Make_ndx` and which should select the solute. *dirname* : directory name Name of the directory in which all files for the solvation stage are stored. *includes* List of additional directories to add to the mdp include path *kwargs* Additional arguments are passed on to :class:`~gromacs.tools.Editconf` or are interpreted as parameters to be changed in the mdp file. """ structure = realpath(struct) topology = realpath(top) # arguments for editconf that we honour editconf_keywords = [ "box", "bt", "angles", "c", "center", "aligncenter", "align", "translate", "rotate", "princ" ] editconf_kwargs = dict((k, kwargs.pop(k, None)) for k in editconf_keywords) editconf_boxtypes = [ "triclinic", "cubic", "dodecahedron", "octahedron", None ] # needed for topology scrubbing scrubber_kwargs = {'marker': kwargs.pop('marker', None)} # sanity checks and argument dependencies bt = editconf_kwargs.pop('bt') boxtype = bt if bt else boxtype # bt takes precedence over boxtype if not boxtype in editconf_boxtypes: msg = "Unsupported boxtype {boxtype!r}: Only {boxtypes!r} are possible.".format( **vars()) logger.error(msg) raise ValueError(msg) if editconf_kwargs['box']: distance = None # if box is set then user knows what she is doing... # handle additional include directories (kwargs are also modified!) mdp_kwargs = cbook.add_mdp_includes(topology, kwargs) if water.lower() in ('spc', 'spce'): water = 'spc216' elif water.lower() == 'tip3p': water = 'spc216' logger.warning( "TIP3P water model selected: using SPC equilibrated box " "for initial solvation because it is a reasonable starting point " "for any 3-point model. EQUILIBRATE THOROUGHLY!") # By default, grompp should not choke on a few warnings because at # this stage the user cannot do much about it (can be set to any # value but is kept undocumented...) grompp_maxwarn = kwargs.pop('maxwarn', 10) # clean topology (if user added the marker; the default marker is # ; Gromacs auto-generated entries follow: n_removed = cbook.remove_molecules_from_topology(topology, **scrubber_kwargs) with in_dir(dirname): logger.info( "[{dirname!s}] Solvating with water {water!r}...".format(**vars())) if boxtype is None: hasBox = False ext = os.path.splitext(structure)[1] if ext == '.gro': hasBox = True elif ext == '.pdb': with open(structure) as struct: for line in struct: if line.startswith('CRYST'): hasBox = True break if not hasBox: msg = "No box data in the input structure {structure!r} and boxtype is set to None".format( **vars()) logger.exception(msg) raise MissingDataError(msg) distance = boxtype = None # ensures that editconf just converts editconf_kwargs.update({ 'f': structure, 'o': 'boxed.gro', 'bt': boxtype, 'd': distance }) gromacs.editconf(**editconf_kwargs) if with_membrane: vdwradii_dat = get_lipid_vdwradii() # need to clean up afterwards logger.info("Using special vdW radii for lipids {0!r}".format( vdw_lipid_resnames)) try: gromacs.genbox(p=topology, cp='boxed.gro', cs=water, o='solvated.gro') except: if with_membrane: # remove so that it's not picked up accidentally utilities.unlink_f(vdwradii_dat) raise logger.info("Solvated system with %s", water) with open('none.mdp', 'w') as mdp: mdp.write( '; empty mdp file\ninclude = {include!s}\nrcoulomb = 1\nrvdw = 1\nrlist = 1\n' .format(**mdp_kwargs)) qtotgmx = cbook.grompp_qtot(f='none.mdp', o='topol.tpr', c='solvated.gro', p=topology, stdout=False, maxwarn=grompp_maxwarn) qtot = round(qtotgmx) logger.info( "[{dirname!s}] After solvation: total charge qtot = {qtotgmx!r} = {qtot!r}" .format(**vars())) if concentration != 0: logger.info( "[{dirname!s}] Adding ions for c = {concentration:f} M...". format(**vars())) # target concentration of free ions c ==> # N = N_water * c/c_water # add ions for concentration to the counter ions (counter ions are less free) # # get number of waters (count OW ... works for SPC*, TIP*P water models) rc, output, junk = gromacs.make_ndx(f='topol.tpr', o='ow.ndx', input=('keep 0', 'del 0', 'a OW*', 'name 0 OW', '', 'q'), stdout=False) groups = cbook.parse_ndxlist(output) gdict = {g['name']: g for g in groups} # overkill... N_water = gdict['OW']['natoms'] # ... but dict lookup is nice N_ions = int(N_water * concentration / CONC_WATER) # number of monovalents else: N_ions = 0 # neutralize (or try -neutral switch of genion???) n_cation = n_anion = 0 if qtot > 0: n_anion = int(abs(qtot)) elif qtot < 0: n_cation = int(abs(qtot)) n_cation += N_ions n_anion += N_ions if n_cation != 0 or n_anion != 0: # sanity check: assert qtot + n_cation - n_anion < 1e-6 logger.info( "[{dirname!s}] Adding n_cation = {n_cation:d} and n_anion = {n_anion:d} ions..." .format(**vars())) gromacs.genion(s='topol.tpr', o='ionized.gro', p=topology, pname=cation, nname=anion, np=n_cation, nn=n_anion, input=solvent_name) else: # fake ionized file ... makes it easier to continue without too much fuzz try: os.unlink('ionized.gro') except OSError, err: if err.errno != errno.ENOENT: raise os.symlink('solvated.gro', 'ionized.gro') qtot = cbook.grompp_qtot(f='none.mdp', o='ionized.tpr', c='ionized.gro', p=topology, stdout=False, maxwarn=grompp_maxwarn) if abs(qtot) > 1e-4: wmsg = "System has non-zero total charge qtot = {qtot:g} e.".format( **vars()) warnings.warn(wmsg, category=BadParameterWarning) logger.warn(wmsg) # make main index try: make_main_index('ionized.tpr', selection=mainselection, ndx=ndx) except GromacsError, err: # or should I rather fail here? wmsg = "Failed to make main index file %r ... maybe set mainselection='...'.\n"\ "The error message was:\n%s\n" % (ndx, str(err)) logger.warn(wmsg) warnings.warn(wmsg, category=GromacsFailureWarning)
def test2(a, radius, box=100.0, fbase=None): # l.test2(5.0797,15) if fbase is None: fbase = "{}_{}".format(box, radius) top = "{}.top".format(fbase) struct = "{}.pdb".format(fbase) sol = "{}.sol.pdb".format(fbase) ndx = "{}.ndx".format(fbase) origin = box / 2. pts = fcc_sphere(a, radius) w = writer("{}.pdb".format(fbase)) w.CRYST1([box, box, box, 90.00, 90.00, 90.00]) for index, atom in enumerate(pts): w.ATOM(serial=index + 1, name="AU", resName="NP", resSeq=1, chainID="A", segID="AUNP", element="AU", x=atom[0] + origin, y=atom[1] + origin, z=atom[2] + origin) w.close() #make_index("{}.pdb".format(fbase), "{}.ndx".format(fbase)) with file(top, "w") as t: t.write(top_src) t.write("Au {}\n".format(pts.shape[0])) gromacs.genbox(p=top, cp=struct, cs="spc216.gro", o=sol, vdwd="0.15") #@UndefinedVariable rc, out, nothing = gromacs.make_ndx( f=sol, n=None, o=ndx, stdout=False, #@UndefinedVariable input=('', '', 'q')) gromacs.grompp(f="md2.mdp", o="{}.tpr".format(fbase), c=sol, p=top, n=ndx) #@UndefinedVariable with file("{}.sh".format(fbase), "w") as f: f.write("#!/bin/bash\n") f.write("#PBS -k o\n") f.write("#PBS -l nodes=1:ppn=12:ccvt,walltime=24:00:00\n") f.write("#PBS -M [email protected]\n") f.write("#PBS -m abe\n") f.write("#PBS -N {}\n".format(fbase)) f.write("#PBS -j oe\n") f.write("#PBS -q pg\n") f.write("#PBS -d /N/dc/scratch/somogyie/Au\n") f.write("mpirun mdrun -deffnm {}".format(fbase))