def bilayer_sorter(structure,ndx='system-groups'): """ Divide the system into groups. """ if 'protein_ready' in wordspace: gmx('make_ndx',structure=structure,ndx='%s-inspect'%structure, log='make-ndx-%s-inspect'%structure,inpipe="q\n") with open(wordspace['step']+'log-make-ndx-%s-inspect'%structure) as fp: lines = fp.readlines() #---find the protein group because it may not be obvious in CGMD make_ndx_sifter = '^\s*([0-9]+)\s*Protein' protein_group = int(re.findall(make_ndx_sifter, next(i for i in lines if re.match(make_ndx_sifter,i)))[0]) group_selector = "\n".join([ "keep %s"%protein_group, "name 0 PROTEIN", #---! hacked " || ".join(['r '+r for r in wordspace['lipids']+['PIP2']]), "name 1 LIPIDS", " || ".join(['r '+r for r in ['W','ION',wordspace['cation'],wordspace['anion']]]), "name 2 SOLVENT", "0 | 1 | 2","name 3 SYSTEM","q"])+"\n" else: group_selector = "\n".join([ "keep 0", "name 0 SYSTEM", " || ".join(['r '+r for r in wordspace['lipids']]), "name 1 LIPIDS", " || ".join(['r '+r for r in ['W','ION',wordspace['cation'],wordspace['anion']]]), "name 2 SOLVENT","q"])+"\n" gmx('make_ndx',structure='system',ndx=ndx,log='make-ndx-groups', inpipe=group_selector)
def vacuum_pack(structure='vacuum', name='vacuum-pack', gro='vacuum-packed', pbc='nojump'): """ Pack the lipids in the plane, gently. """ gmx('grompp', base='md-%s' % name, top='vacuum', structure=structure, log='grompp-%s' % name, mdp='input-md-%s-eq-in' % name, flag='-maxwarn 100') gmx('mdrun', base='md-%s' % name, log='mdrun-%s' % name, skip=True) if pbc: remove_jump(structure='md-%s' % name, tpr='md-' + name, gro='md-%s-%s' % (name, pbc)) filecopy(wordspace['step'] + 'md-%s-%s.gro' % (name, pbc), wordspace['step'] + '%s.gro' % gro) else: filecopy(wordspace['step'] + 'md-%s' % gro, wordspace['step'] + '%s.gro' % gro) boxdims_old, boxdims = get_box_vectors(gro) wordspace['bilayer_dimensions_slab'][:2] = boxdims_old[:2]
def get_box_vectors(structure,gro=None,d=0,log='checksize'): """ Return the box vectors. """ if not gro: gro = structure+'-check-box' #---note that we consult the command_library here gmx('editconf',structure=structure,gro=gro, log='editconf-%s'%log,flag='-d %d'%d) with open(wordspace['step']+'log-editconf-%s'%log,'r') as fp: lines = fp.readlines() box_vector_regex = '\s*box vectors\s*\:\s*([^\s]+)\s+([^\s]+)\s+([^\s]+)' box_vector_new_regex = '\s*new box vectors\s*\:\s*([^\s]+)\s+([^\s]+)\s+([^\s]+)' runon_regex = '^\s*([-]?[0-9]+\.?[0-9]{0,3})\s*([-]?[0-9]+\.?[0-9]{0,3})\s*([-]?[0-9]+\.?[0-9]{0,3})' old_line = [l for l in lines if re.match(box_vector_regex,l)][0] vecs_old = re.findall('\s*box vectors\s*:([^\(]+)',old_line)[0] try: #---sometimes the numbers run together try: vecs_old = [float(i) for i in vecs_old.strip(' ').split()] except: vecs_old = [float(i) for i in re.findall(runon_regex,vecs_old)[0]] #---repeat for new box vectors new_line = [l for l in lines if re.match(box_vector_new_regex,l)][0] vecs_new = re.findall('\s*box vectors\s*:([^\(]+)',new_line)[0] try: vecs_new = [float(i) for i in vecs_new.strip(' ').split()] except: vecs_new = [float(i) for i in re.findall(runon_regex,vecs_new)[0]] except: import pdb;pdb.set_trace() #---no need to keep the output since it is a verbatim copy for diagnostic only os.remove(wordspace['step']+gro+'.gro') #import pdb;pdb.set_trace() return vecs_old,vecs_new
def remove_jump(structure,tpr,gro): """ Correct that thing where the bilayer crosses the PBCs and gets split. """ gmx('make_ndx',ndx=structure,structure=structure,inpipe="keep 0\nq\n",log='make-ndx-nojump') gmx('trjconv',ndx=structure,structure=structure,gro=gro,tpr=tpr, log='trjconv-%s-nojump'%structure,flag='-pbc nojump') os.remove(wordspace['step']+'log-'+'make-ndx-nojump')
def solvate_bilayer(structure='vacuum'): """ Solvate a CGMD bilayer (possibly with proteins) avoiding overlaps. """ #---check the size of the slab incoming_structure = str(structure) boxdims_old,boxdims = get_box_vectors(structure) #---! standardize these? basedim = 3.64428 waterbox = 'inputs/martini-water' #---make an oversized water box newdims = boxdims_old[:2]+[wordspace['solvent_thickness']] #import pdb;pdb.set_trace() gmx('genconf',structure='martini-water',gro='solvate-empty-uncentered-untrimmed', nbox=' '.join([str(int(i/basedim+1)) for i in newdims]),log='genconf') #---trimming waters with open(wordspace['step']+'solvate-empty-uncentered-untrimmed.gro','r') as fp: lines = fp.readlines() modlines = [] for line in lines[2:-1]: coords = [float(i) for i in line[20:].split()][:3] if all([coords[i]<newdims[i] for i in range(3)]): modlines.append(line) with open(wordspace['step']+'solvate-empty-uncentered.gro','w') as fp: fp.write(lines[0]) fp.write(str(len(modlines))+'\n') for l in modlines: fp.write(l) fp.write(lines[-1]) #---update waters structure='solvate-empty-uncentered' component('W',count=count_molecules(structure,'W')) #---translate the water box gmx('editconf',structure=structure,gro='solvate-water-shifted', flag='-translate 0 0 %f'%(wordspace['bilayer_dimensions_slab'][2]/2.),log='editconf-solvate-shift') #---combine and trim with new box vectors #---! skipping minimization? structure = 'solvate-water-shifted' boxdims_old,boxdims = get_box_vectors(structure) boxvecs = wordspace['bilayer_dimensions_slab'][:2]+[wordspace['bilayer_dimensions_slab'][2]+boxdims[2]] gro_combinator('%s.gro'%incoming_structure,structure,box=boxvecs, cwd=wordspace['step'],gro='solvate-dense') structure = 'solvate-dense' trim_waters(structure=structure,gro='solvate',boxcut=False, gap=wordspace['protein_water_gap'],method='cgmd',boxvecs=boxvecs) structure = 'solvate' nwaters = count_molecules(structure,'W') component('W',count=nwaters) wordspace['bilayer_dimensions_solvate'] = boxvecs wordspace['water_without_ions'] = nwaters
def equilibrate(groups=None,structure='system'): """ equilibrate() Standard equilibration procedure. """ #---sequential equilibration stages seq = wordspace.equilibration.split(',') if wordspace.equilibration else [] for eqnum,name in enumerate(seq): if not equilibrate_check(name): gmx('grompp',base='md-%s'%name,top='system', structure=structure if eqnum == 0 else 'md-%s'%seq[eqnum-1], log='grompp-%s'%name,mdp='input-md-%s-eq-in'%name, flag=('' if not groups else '-n %s'%groups)+' -maxwarn 10') gmx('mdrun',base='md-%s'%name,log='mdrun-%s'%name,skip=True) assert os.path.isfile(wordspace['step']+'md-%s.gro'%name) checkpoint() #---first part of the equilibration/production run name = 'md.part0001' if not equilibrate_check(name) or seq == []: gmx('grompp',base=name,top='system', structure='md-%s'%seq[-1] if seq else structure, log='grompp-0001',mdp='input-md-in', flag='' if not groups else '-n %s'%groups) gmx('mdrun',base=name,log='mdrun-0001') #---we don't assert that the file exists here because the user might kill it and upload checkpoint()
def minimize(name,method='steep',top=None): """ minimize(name,method='steep') Standard minimization procedure. """ gmx('grompp',base='em-%s-%s'%(name,method),top=name if not top else re.sub('^(.+)\.top$',r'\1',top), structure=name,log='grompp-%s-%s'%(name,method),mdp='input-em-%s-in'%method,skip=True) assert os.path.isfile(wordspace['step']+'em-%s-%s.tpr'%(name,method)) gmx('mdrun',base='em-%s-%s'%(name,method),log='mdrun-%s-%s'%(name,method)) filecopy(wordspace['step']+'em-'+'%s-%s.gro'%(name,method), wordspace['step']+'%s-minimized.gro'%name) checkpoint()
def count_molecules(structure,resname): """ Count the number of molecules in a system using make_ndx. """ gmx('make_ndx',structure=structure,ndx=structure+'-count', log='make-ndx-%s-check'%structure,inpipe='q\n') with open(wordspace['step']+'log-make-ndx-%s-check'%structure) as fp: lines = fp.readlines() try: residue_regex = '^\s*[0-9]+\s+%s\s+\:\s+([0-9]+)\s'%resname count, = [int(re.findall(residue_regex,l)[0]) for l in lines if re.match(residue_regex,l)] except: raise Exception('cannot find resname "%s" in %s'%(resname,'make-ndx-%s-check'%structure)) return count
def minimize_steep_cg(name): """ minimize_steep_cg(name) Minimization using steepest descent followed by conjugate gradient. Note that this method has been retired due to reliability issues. """ gmx('grompp', base='em-%s-steep' % name, top=name, structure=name, log='grompp-em-%s-steep' % name, mdp='input-em-steep-in') gmx('mdrun', base='em-%s-steep' % name, log='mdrun-%s-steep' % name, skip=True) #---if first step fails we skip it and try again with the second minimizer if not os.path.isfile(wordspace['step'] + 'em-%s-steep.gro'): filecopy(wordspace['step'] + '%s.gro' % name, wordspace['step'] + 'em-%s-steep.gro' % name) gmx('grompp', base='em-%s-cg' % name, top=name, structure='em-%s-steep' % name, log='grompp-%s-cg' % name, mdp='input-em-cg-in', skip=True) gmx('mdrun', base='em-%s-cg' % name, log='mdrun-%s-cg' % name) select_minimum('%s-steep' % name, '%s-cg' % name, gro='%s-minimized' % name) checkpoint()
def equilibrate(groups=None, structure='system'): """ equilibrate() Standard equilibration procedure. """ #---sequential equilibration stages seq = wordspace.equilibration.split(',') if wordspace.equilibration else [] for eqnum, name in enumerate(seq): if not equilibrate_check(name): gmx('grompp', base='md-%s' % name, top='system', structure=structure if eqnum == 0 else 'md-%s' % seq[eqnum - 1], log='grompp-%s' % name, mdp='input-md-%s-eq-in' % name, flag=('' if not groups else '-n %s' % groups) + ' -maxwarn 10') gmx('mdrun', base='md-%s' % name, log='mdrun-%s' % name, skip=True) assert os.path.isfile(wordspace['step'] + 'md-%s.gro' % name) checkpoint() #---first part of the equilibration/production run name = 'md.part0001' if not equilibrate_check(name) or seq == []: gmx('grompp', base=name, top='system', structure='md-%s' % seq[-1] if seq else structure, log='grompp-0001', mdp='input-md-in', flag='' if not groups else '-n %s' % groups) gmx('mdrun', base=name, log='mdrun-0001') #---we don't assert that the file exists here because the user might kill it and upload checkpoint()
def vacuum_pack(structure='vacuum',name='vacuum-pack',gro='vacuum-packed'): """ Pack the lipids in the plane, gently. """ gmx('grompp',base='md-%s'%name,top='vacuum', structure=structure,log='grompp-%s'%name,mdp='input-md-%s-eq-in'%name, flag='-maxwarn 100') gmx('mdrun',base='md-%s'%name,log='mdrun-%s'%name,skip=True) remove_jump(structure='md-%s'%name,tpr='md-'+name,gro='md-%s-nojump'%name) filecopy(wordspace['step']+'md-%s-nojump.gro'%name,wordspace['step']+'%s.gro'%gro) boxdims_old,boxdims = get_box_vectors(gro) wordspace['bilayer_dimensions_slab'][:2] = boxdims_old[:2]
def bilayer_sorter(structure, ndx='system-groups'): """ Divide the system into groups. """ if 'protein_ready' in wordspace: gmx('make_ndx', structure=structure, ndx='%s-inspect' % structure, log='make-ndx-%s-inspect' % structure, inpipe="q\n") with open(wordspace['step'] + 'log-make-ndx-%s-inspect' % structure) as fp: lines = fp.readlines() #---find the protein group because it may not be obvious in CGMD make_ndx_sifter = '^\s*([0-9]+)\s*Protein' protein_group = int( re.findall(make_ndx_sifter, next(i for i in lines if re.match(make_ndx_sifter, i)))[0]) group_selector = "\n".join([ "keep %s" % protein_group, "name 0 PROTEIN", #---! hacked " || ".join(['r ' + r for r in wordspace['lipids'] + ['PIP2']]), "name 1 LIPIDS", " || ".join([ 'r ' + r for r in ['W', 'ION', wordspace['cation'], wordspace['anion']] ]), "name 2 SOLVENT", "0 | 1 | 2", "name 3 SYSTEM", "q" ]) + "\n" else: group_selector = "\n".join([ "keep 0", "name 0 SYSTEM", " || ".join([ 'r ' + r for r in wordspace['lipids'] ]), "name 1 LIPIDS", " || ".join([ 'r ' + r for r in ['W', 'ION', wordspace['cation'], wordspace['anion']] ]), "name 2 SOLVENT", "q" ]) + "\n" gmx('make_ndx', structure='system', ndx=ndx, log='make-ndx-groups', inpipe=group_selector)
def solvate_bilayer(structure='vacuum'): """ Solvate a CGMD bilayer (possibly with proteins) avoiding overlaps. """ #---check the size of the slab incoming_structure = str(structure) boxdims_old,boxdims = get_box_vectors(structure) #---check the size of the water box waterbox = wordspace.water_box basedim,_ = get_box_vectors(waterbox) if not all([i==basedim[0] for i in basedim]): raise Exception('[ERROR] expecting water box "" to be cubic') else: basedim = basedim[0] #---make an oversized water box newdims = boxdims_old[:2]+[wordspace['solvent_thickness']] gmx('genconf',structure=waterbox,gro='solvate-empty-uncentered-untrimmed', nbox=' '.join([str(int(i/basedim+1)) for i in newdims]),log='genconf') #---trim the blank water box trim_waters(structure='solvate-empty-uncentered-untrimmed', gro='solvate-empty-uncentered',boxcut=True,boxvecs=newdims, gap=0.0,method=wordspace.atom_resolution) #---update waters structure='solvate-empty-uncentered' component(wordspace.sol,count=count_molecules(structure,wordspace.sol)) #---translate the water box gmx('editconf',structure=structure,gro='solvate-water-shifted', flag='-translate 0 0 %f'%(wordspace['bilayer_dimensions_slab'][2]/2.),log='editconf-solvate-shift') #---combine and trim with new box vectors structure = 'solvate-water-shifted' boxdims_old,boxdims = get_box_vectors(structure) boxvecs = wordspace['bilayer_dimensions_slab'][:2]+[wordspace['bilayer_dimensions_slab'][2]+boxdims[2]] gro_combinator('%s.gro'%incoming_structure,structure,box=boxvecs, cwd=wordspace['step'],gro='solvate-dense') structure = 'solvate-dense' #---trim everything so that waters are positioned in the box without steric clashes trim_waters(structure=structure,gro='solvate',boxcut=False, gap=wordspace['protein_water_gap'],method=wordspace.atom_resolution,boxvecs=boxvecs) structure = 'solvate' nwaters = count_molecules(structure,wordspace.sol)/({'aamd':3.0,'cgmd':1.0}[wordspace.atom_resolution]) if round(nwaters)!=nwaters: raise Exception('[ERROR] fractional water molecules') else: nwaters = int(nwaters) component(wordspace.sol,count=nwaters) wordspace['bilayer_dimensions_solvate'] = boxvecs wordspace['water_without_ions'] = nwaters
def remove_jump(structure, tpr, gro): """ Correct that thing where the bilayer crosses the PBCs and gets split. """ gmx('make_ndx', ndx=structure, structure=structure, inpipe="keep 0\nq\n", log='make-ndx-nojump') gmx('trjconv', ndx=structure, structure=structure, gro=gro, tpr=tpr, log='trjconv-%s-nojump' % structure, flag='-pbc nojump') os.remove(wordspace['step'] + 'log-' + 'make-ndx-nojump')
def minimize(name, method='steep', top=None): """ minimize(name,method='steep') Standard minimization procedure. """ gmx('grompp', base='em-%s-%s' % (name, method), top=name if not top else re.sub('^(.+)\.top$', r'\1', top), structure=name, log='grompp-%s-%s' % (name, method), mdp='input-em-%s-in' % method, skip=True) assert os.path.isfile(wordspace['step'] + 'em-%s-%s.tpr' % (name, method)) gmx('mdrun', base='em-%s-%s' % (name, method), log='mdrun-%s-%s' % (name, method)) filecopy(wordspace['step'] + 'em-' + '%s-%s.gro' % (name, method), wordspace['step'] + '%s-minimized.gro' % name) checkpoint()
def bilayer_middle(structure, gro): """ Move the bilayer to the middle of the z-coordinate of the box. Note that the protein adhesion procedure works best on a slab that is centered on z=0. This means that the bilayer will be broken across z=0. For visualization it is better to center it. """ gmx('make_ndx', ndx='system-dry', structure='counterions-minimized', inpipe="keep 0\nr %s || r ION || r %s || r %s\n!1\ndel 1\nq\n" % (wordspace['sol'], wordspace['anion'], wordspace['cation']), log='make-ndx-center') #---bilayer slab is near z=0 so it is likely split so we shift by half of the box vector gmx('trjconv', structure='counterions-minimized', gro='counterions-shifted', ndx='system-dry', flag='-trans 0 0 %f -pbc mol' % (wordspace['bilayer_dimensions_solvate'][2] / 2.), tpr='em-counterions-steep', log='trjconv-shift', inpipe="0\n") #---center everything gmx('trjconv', structure='counterions-shifted', gro='system', ndx='system-dry', tpr='em-counterions-steep', log='trjconv-middle', inpipe="1\n0\n", flag='-center -pbc mol')
def get_box_vectors(structure, gro=None, d=0, log='checksize'): """ Return the box vectors. """ if not gro: gro = structure + '-check-box' #---note that we consult the command_library here gmx('editconf', structure=structure, gro=gro, log='editconf-%s' % log, flag='-d %d' % d) with open(wordspace['step'] + 'log-editconf-%s' % log, 'r') as fp: lines = fp.readlines() box_vector_regex = '\s*box vectors\s*\:\s*([^\s]+)\s+([^\s]+)\s+([^\s]+)' box_vector_new_regex = '\s*new box vectors\s*\:\s*([^\s]+)\s+([^\s]+)\s+([^\s]+)' runon_regex = '^\s*([-]?[0-9]+\.?[0-9]{0,3})\s*([-]?[0-9]+\.?[0-9]{0,3})\s*([-]?[0-9]+\.?[0-9]{0,3})' old_line = [l for l in lines if re.match(box_vector_regex, l)][0] vecs_old = re.findall('\s*box vectors\s*:([^\(]+)', old_line)[0] try: #---sometimes the numbers run together try: vecs_old = [float(i) for i in vecs_old.strip(' ').split()] except: vecs_old = [float(i) for i in re.findall(runon_regex, vecs_old)[0]] #---repeat for new box vectors new_line = [l for l in lines if re.match(box_vector_new_regex, l)][0] vecs_new = re.findall('\s*box vectors\s*:([^\(]+)', new_line)[0] try: vecs_new = [float(i) for i in vecs_new.strip(' ').split()] except: vecs_new = [float(i) for i in re.findall(runon_regex, vecs_new)[0]] except: import pdb pdb.set_trace() #---no need to keep the output since it is a verbatim copy for diagnostic only os.remove(wordspace['step'] + gro + '.gro') #import pdb;pdb.set_trace() return vecs_old, vecs_new
def count_molecules(structure, resname): """ Count the number of molecules in a system using make_ndx. """ gmx('make_ndx', structure=structure, ndx=structure + '-count', log='make-ndx-%s-check' % structure, inpipe='q\n') with open(wordspace['step'] + 'log-make-ndx-%s-check' % structure) as fp: lines = fp.readlines() try: residue_regex = '^\s*[0-9]+\s+%s\s+\:\s+([0-9]+)\s' % resname count, = [ int(re.findall(residue_regex, l)[0]) for l in lines if re.match(residue_regex, l) ] except: raise Exception('cannot find resname "%s" in %s' % (resname, 'make-ndx-%s-check' % structure)) return count
def minimize_steep_cg(name): """ minimize_steep_cg(name) Minimization using steepest descent followed by conjugate gradient. Note that this method has been retired due to reliability issues. """ gmx('grompp',base='em-%s-steep'%name,top=name,structure=name, log='grompp-em-%s-steep'%name,mdp='input-em-steep-in') gmx('mdrun',base='em-%s-steep'%name,log='mdrun-%s-steep'%name,skip=True) #---if first step fails we skip it and try again with the second minimizer if not os.path.isfile(wordspace['step']+'em-%s-steep.gro'): filecopy(wordspace['step']+'%s.gro'%name,wordspace['step']+'em-%s-steep.gro'%name) gmx('grompp',base='em-%s-cg'%name,top=name,structure='em-%s-steep'%name, log='grompp-%s-cg'%name,mdp='input-em-cg-in',skip=True) gmx('mdrun',base='em-%s-cg'%name,log='mdrun-%s-cg'%name) select_minimum('%s-steep'%name,'%s-cg'%name,gro='%s-minimized'%name) checkpoint()
def solvate(structure,top): """ solvate(structure,top) Standard solvate procedure for atomistic protein in water. """ #---purge the wordspace of solvent and anions in case we are resuming for key in [wordspace['anion'],wordspace['cation'],'SOL']: if key in zip(*wordspace['composition'])[0]: del wordspace['composition'][zip(*wordspace['composition'])[0].index(key)] gmx('editconf',structure=structure,gro='solvate-box-alone', log='editconf-checksize',flag='-d 0') with open(wordspace['step']+'log-editconf-checksize','r') as fp: lines = fp.readlines() boxdims = map(lambda y:float(y),re.findall('\s*box vectors \:\s*([^\s]+)\s+([^\s]+)\s+([^\s]+)', filter(lambda x:re.match('\s*box vectors',x),lines).pop()).pop()) boxvecs = tuple([i+2*wordspace['water_buffer'] for i in boxdims]) center = tuple([i/2. for i in boxvecs]) #---cube is not implemented yet gmx('editconf',structure=structure,gro='solvate-protein', flags='-center %f %f %f'%center+' '+'-box %f %f %f'%boxvecs, log='editconf-center-protein') gmx('genbox',structure='solvate-protein',solvent=wordspace['solvent'], gro='solvate-dense',#top='solvate-standard', log='genbox-solvate') #---trim waters if the protein_water_gap setting is not False if 'protein_water_gap' in wordspace and wordspace['protein_water_gap'] != False: trim_waters(structure='solvate-dense',gro='solvate', gap=wordspace['protein_water_gap'],boxvecs=boxvecs) else: filecopy(wordspace['step']+'solvate-dense.gro',wordspace['step']+'solvate.gro') gmx('make_ndx',structure='solvate',ndx='solvate-water-check',inpipe='q\n', log='make-ndx-solvate-check') with open(wordspace['step']+'log-make-ndx-solvate-check','r') as fp: lines = fp.readlines() nwaters = int(re.findall('\s*[0-9]+\s+Water\s+:\s+([0-9]+)\s+atoms', filter(lambda x:re.match('\s*[0-9]+\s+Water',x),lines).pop()).pop())/3 wordspace['water_without_ions'] = nwaters component('SOL',count=nwaters) #---add the suffix so that water is referred to by its name in the settings include(wordspace['water'],ff=True) write_top('')
def bilayer_middle(structure,gro): """ Move the bilayer to the middle of the z-coordinate of the box. Note that the protein adhesion procedure works best on a slab that is centered on z=0. This means that the bilayer will be broken across z=0. For visualization it is better to center it. """ gmx('make_ndx',ndx='system-dry',structure='counterions-minimized', inpipe="keep 0\nr %s || r ION || r %s || r %s\n!1\ndel 1\nq\n"%( wordspace['sol'],wordspace['anion'],wordspace['cation']), log='make-ndx-center') #---bilayer slab is near z=0 so it is likely split so we shift by half of the box vector gmx('trjconv',structure='counterions-minimized',gro='counterions-shifted',ndx='system-dry', flag='-trans 0 0 %f -pbc mol'%(wordspace['bilayer_dimensions_solvate'][2]/2.), tpr='em-counterions-steep',log='trjconv-shift',inpipe="0\n") #---center everything gmx('trjconv',structure='counterions-shifted',gro='system',ndx='system-dry', tpr='em-counterions-steep',log='trjconv-middle',inpipe="1\n0\n",flag='-center -pbc mol')
def counterions(structure,top,includes=None,ff_includes=None,gro='counterions'): """ counterions(structure,top) Standard procedure for adding counterions. The resname must be understandable by "r RESNAME" in make_ndx and writes to the top file. """ #---we store the water resname in the wordspace as "sol" resname = wordspace.get('sol','SOL') #---clean up the composition in case this is a restart for key in ['cation','anion',resname]: try: wordspace['composition'].pop(zip(*wordspace['composition'])[0].index(wordspace[key])) except: pass component(resname,count=wordspace['water_without_ions']) #---write the topology file as of the solvate step instead of copying them (genion overwrites top) write_top('') gmx('grompp',base='genion',structure=structure, top='counterions',mdp='input-em-steep-in', log='grompp-genion') gmx('make_ndx',structure=structure,ndx='solvate-waters', inpipe='keep 0\nr %s\nkeep 1\nq\n'%resname, log='make-ndx-counterions-check') gmx('genion',base='genion',gro=gro,ndx='solvate-waters', cation=wordspace['cation'],anion=wordspace['anion'], flag='-conc %f -neutral'%wordspace['ionic_strength'], log='genion') with open(wordspace['step']+'log-genion','r') as fp: lines = fp.readlines() declare_ions = filter(lambda'Will try',x)!=None,lines).pop() ion_counts = re.findall( '^Will try to add ([0-9]+)\+?\-? ([\w\+\-]+) ions and ([0-9]+) ([\w\+\-]+) ions', declare_ions).pop() for ii in range(2): component(ion_counts[2*ii+1],count=ion_counts[2*ii]) component(resname,count=component(resname)-component(ion_counts[1])-component(ion_counts[3])) if includes: if type(includes)==str: includes = [includes] for i in includes: include(i) if ff_includes: if type(ff_includes)==str: ff_includes = [ff_includes] for i in ff_includes: include(i,ff=True) write_top('')
def write_structure_pdb(structure, pdb): """ write_structure_pdb(structure,pdb) Infer the starting residue from the original PDB and write structure.pdb with the correct indices according to the latest GRO structure (typically counterions.gro). """ #---automatically center the protein in the box here and write the final structure gmx( 'make_ndx', structure='counterions', ndx='counterions-groups', log='make-ndx-counterions', inpipe='q\n', ) with open(wordspace['step'] + 'log-make-ndx-counterions', 'r') as fp: lines = fp.readlines() relevant = [ filter(lambda x: re.match('\s*[0-9]+\s+%s' % name, x), lines) for name in ['System', 'Protein'] ] groupdict = dict([ (j[1], int(j[0])) for j in [re.findall('^\s*([0-9]+)\s(\w+)', x[0])[0] for x in relevant] ]) gmx('trjconv', ndx='counterions-groups', structure='counterions-minimized', inpipe='%d\n%d\n' % (groupdict['Protein'], groupdict['System']), log='trjconv-counterions-center', tpr='em-counterions-steep', gro='system') with open(wordspace['step'] + pdb, 'r') as fp: lines = fp.readlines() startres = int([line for line in lines if re.match('^ATOM', line)][0][23:26 + 1]) gmx('editconf', structure=structure, flag='-o structure.pdb -resnr %d' % startres, log='editconf-structure-pdb')
def write_structure_pdb(structure,pdb): """ write_structure_pdb(structure,pdb) Infer the starting residue from the original PDB and write structure.pdb with the correct indices according to the latest GRO structure (typically counterions.gro). """ #---automatically center the protein in the box here and write the final structure gmx('make_ndx',structure='counterions',ndx='counterions-groups', log='make-ndx-counterions',inpipe='q\n',) with open(wordspace['step']+'log-make-ndx-counterions','r') as fp: lines = fp.readlines() relevant = [filter(lambda x:re.match('\s*[0-9]+\s+%s'%name,x),lines) for name in ['System','Protein']] groupdict = dict([(j[1],int(j[0])) for j in [re.findall('^\s*([0-9]+)\s(\w+)',x[0])[0] for x in relevant]]) gmx('trjconv',ndx='counterions-groups',structure='counterions-minimized', inpipe='%d\n%d\n'%(groupdict['Protein'],groupdict['System']), log='trjconv-counterions-center',tpr='em-counterions-steep',gro='system') with open(wordspace['step']+pdb,'r') as fp: lines = fp.readlines() startres = int([line for line in lines if re.match('^ATOM',line)][0][23:26+1]) gmx('editconf',structure=structure, flag='-o structure.pdb -resnr %d'%startres, log='editconf-structure-pdb')
def counterions(structure, top, includes=None, ff_includes=None, gro='counterions'): """ counterions(structure,top) Standard procedure for adding counterions. The resname must be understandable by "r RESNAME" in make_ndx and writes to the top file. """ #---we store the water resname in the wordspace as "sol" resname = wordspace.get('sol', 'SOL') #---clean up the composition in case this is a restart for key in ['cation', 'anion', resname]: try: wordspace['composition'].pop( zip(*wordspace['composition'])[0].index(wordspace[key])) except: pass component(resname, count=wordspace['water_without_ions']) #---write the topology file as of the solvate step instead of copying them (genion overwrites top) write_top('') gmx('grompp', base='genion', structure=structure, top='counterions', mdp='input-em-steep-in', log='grompp-genion') gmx('make_ndx', structure=structure, ndx='solvate-waters', inpipe='keep 0\nr %s\nkeep 1\nq\n' % resname, log='make-ndx-counterions-check') gmx('genion', base='genion', gro=gro, ndx='solvate-waters', cation=wordspace['cation'], anion=wordspace['anion'], flag='-conc %f -neutral' % wordspace['ionic_strength'], log='genion') with open(wordspace['step'] + 'log-genion', 'r') as fp: lines = fp.readlines() declare_ions = filter(lambda x:'Will try', x) != None, lines).pop() ion_counts = re.findall( '^Will try to add ([0-9]+)\+?\-? ([\w\+\-]+) ions and ([0-9]+) ([\w\+\-]+) ions', declare_ions).pop() for ii in range(2): component(ion_counts[2 * ii + 1], count=ion_counts[2 * ii]) component(resname, count=component(resname) - component(ion_counts[1]) - component(ion_counts[3])) if includes: if type(includes) == str: includes = [includes] for i in includes: include(i) if ff_includes: if type(ff_includes) == str: ff_includes = [ff_includes] for i in ff_includes: include(i, ff=True) write_top('')
def multiply(nx=1,ny=1,nz=1,quirky_ions=True): """ Make a copy of a simulation box in multiple directions. """ factor = nx*ny*nz #---update the composition #---if the last step doesn't have the composition we step backwards and pick up requirements #---note that "protein_ready" is important for the bilayer_sorter for prereq in ['composition','lipids','cation','anion','protein_ready']: if prereq not in wordspace: steplist = detect_last(steplist=True)[::-1] #---walk backwards through steps until we find the commposition for ii,i in enumerate(steplist): oldspace = resume(read_only=True,step=int(re.match('s([0-9]+)-',i).group(1))) if prereq in oldspace: wordspace[prereq] = deepcopy(oldspace[prereq]) break #---if composition is available we continue wordspace['new_composition'] = [[name,count*factor] for name,count in wordspace['composition']] kwargs = {} if 'buffer' in wordspace: kwargs['flag'] = ' -dist %.2f %.2f %.2f'%tuple(wordspace['buffer']) gmx('genconf',structure='system-input',gro='system-multiply', nbox="%d %d %d"%(nx,ny,nz),log='genconf-multiply',**kwargs) #---copy ITP files for itp in wordspace.itp: filecopy(wordspace.last_step+itp,wordspace.step+itp) #---reorder the GRO for convenience with open(wordspace['step']+'system-multiply.gro') as fp: lines = fp.readlines() #---collect all unique resiue/atom combinations combos = list(set([l[5:15] for l in lines])) #---for each element in the composition, extract all of the residues for that element lines_reorder = [] lines_reorder.extend(lines[:2]) #---develop a list of filtering rules keylist = {} for key,count in wordspace['new_composition']: if key in [wordspace[i] for i in ['anion','cation']]: keylist[key] = 'regex',(('ION',key),slice(5,15),'\s*%s\s*%s\s*') elif re.match('^(p|P)rotein',key) and key+'.itp' in wordspace.itp: #---custom procedure for finding proteins which have variegated residue numbers itp = read_itp(wordspace.step+key+'.itp') residues_starts = [] seq = list(zip(*itp['atoms'])[3]) residues = [i[5:10].strip() for i in lines] for i in range(len(residues)-len(seq)): #---minor speed up by checking the first one if seq[0]==residues[i] and residues[i:i+len(seq)]==seq: residues_starts.append(i) keylist[key] = 'slices',[slice(i,i+len(seq)) for i in residues_starts] else: keylist[key] = 'regex',(key,slice(5,10),'\s*%s\s*') for key,count in wordspace['new_composition']: method,details = keylist[key] if method == 'regex': key,sl,regex = details lines_reorder.extend([l for l in lines[2:-1] if re.match(regex%key,l[sl])]) elif method == 'slices': for sl in details: lines_reorder.extend(lines[sl]) else: raise lines_reorder.extend([lines[-1]]) with open(wordspace['step']+'system-multiply-reorder.gro','w') as fp: for line in lines_reorder: fp.write(line) filecopy(wordspace['step']+'system-multiply-reorder.gro',wordspace['step']+'system.gro') wordspace['composition'] = tuple(wordspace['new_composition']) del wordspace['new_composition']
def solvate(structure, top): """ solvate(structure,top) Standard solvate procedure for atomistic protein in water. """ #---purge the wordspace of solvent and anions in case we are resuming for key in [wordspace['anion'], wordspace['cation'], 'SOL']: if key in zip(*wordspace['composition'])[0]: del wordspace['composition'][zip( *wordspace['composition'])[0].index(key)] gmx('editconf', structure=structure, gro='solvate-box-alone', log='editconf-checksize', flag='-d 0') with open(wordspace['step'] + 'log-editconf-checksize', 'r') as fp: lines = fp.readlines() boxdims = map( lambda y: float(y), re.findall( '\s*box vectors \:\s*([^\s]+)\s+([^\s]+)\s+([^\s]+)', filter(lambda x: re.match('\s*box vectors', x), lines).pop()).pop()) boxvecs = tuple([i + 2 * wordspace['water_buffer'] for i in boxdims]) center = tuple([i / 2. for i in boxvecs]) #---cube is not implemented yet gmx('editconf', structure=structure, gro='solvate-protein', flags='-center %f %f %f' % center + ' ' + '-box %f %f %f' % boxvecs, log='editconf-center-protein') gmx( 'genbox', structure='solvate-protein', solvent=wordspace['solvent'], gro='solvate-dense', #top='solvate-standard', log='genbox-solvate') #---trim waters if the protein_water_gap setting is not False if 'protein_water_gap' in wordspace and wordspace[ 'protein_water_gap'] != False: trim_waters(structure='solvate-dense', gro='solvate', gap=wordspace['protein_water_gap'], boxvecs=boxvecs) else: filecopy(wordspace['step'] + 'solvate-dense.gro', wordspace['step'] + 'solvate.gro') gmx('make_ndx', structure='solvate', ndx='solvate-water-check', inpipe='q\n', log='make-ndx-solvate-check') with open(wordspace['step'] + 'log-make-ndx-solvate-check', 'r') as fp: lines = fp.readlines() nwaters = int( re.findall( '\s*[0-9]+\s+Water\s+:\s+([0-9]+)\s+atoms', filter(lambda x: re.match('\s*[0-9]+\s+Water', x), lines).pop()).pop()) / 3 wordspace['water_without_ions'] = nwaters component('SOL', count=nwaters) #---add the suffix so that water is referred to by its name in the settings include(wordspace['water'], ff=True) write_top('')
def solvate_bilayer(structure='vacuum'): """ Solvate a CGMD bilayer (possibly with proteins) avoiding overlaps. """ #---check the size of the slab incoming_structure = str(structure) boxdims_old, boxdims = get_box_vectors(structure) #---check the size of the water box waterbox = wordspace.water_box basedim, _ = get_box_vectors(waterbox) if not all([i == basedim[0] for i in basedim]): raise Exception('[ERROR] expecting water box "" to be cubic') else: basedim = basedim[0] #---make an oversized water box newdims = boxdims_old[:2] + [wordspace['solvent_thickness']] gmx('genconf', structure=waterbox, gro='solvate-empty-uncentered-untrimmed', nbox=' '.join([str(int(i / basedim + 1)) for i in newdims]), log='genconf') #---trim the blank water box trim_waters(structure='solvate-empty-uncentered-untrimmed', gro='solvate-empty-uncentered', boxcut=True, boxvecs=newdims, gap=0.0, method=wordspace.atom_resolution) #---update waters structure = 'solvate-empty-uncentered' component(wordspace.sol, count=count_molecules(structure, wordspace.sol)) #---translate the water box gmx('editconf', structure=structure, gro='solvate-water-shifted', flag='-translate 0 0 %f' % (wordspace['bilayer_dimensions_slab'][2] / 2.), log='editconf-solvate-shift') #---combine and trim with new box vectors structure = 'solvate-water-shifted' boxdims_old, boxdims = get_box_vectors(structure) boxvecs = wordspace['bilayer_dimensions_slab'][:2] + [ wordspace['bilayer_dimensions_slab'][2] + boxdims[2] ] gro_combinator('%s.gro' % incoming_structure, structure, box=boxvecs, cwd=wordspace['step'], gro='solvate-dense') structure = 'solvate-dense' #---trim everything so that waters are positioned in the box without steric clashes trim_waters(structure=structure, gro='solvate', boxcut=False, gap=wordspace['protein_water_gap'], method=wordspace.atom_resolution, boxvecs=boxvecs) structure = 'solvate' nwaters = count_molecules(structure, wordspace.sol) / ({ 'aamd': 3.0, 'cgmd': 1.0 }[wordspace.atom_resolution]) if round(nwaters) != nwaters: raise Exception('[ERROR] fractional water molecules') else: nwaters = int(nwaters) component(wordspace.sol, count=nwaters) wordspace['bilayer_dimensions_solvate'] = boxvecs wordspace['water_without_ions'] = nwaters
def multiply(nx=1, ny=1, nz=1, quirky_ions=True): """ Make a copy of a simulation box in multiple directions. """ factor = nx * ny * nz #---update the composition #---if the last step doesn't have the composition we step backwards and pick up requirements #---note that "protein_ready" is important for the bilayer_sorter for prereq in [ 'composition', 'lipids', 'cation', 'anion', 'protein_ready' ]: if prereq not in wordspace: steplist = detect_last(steplist=True)[::-1] #---walk backwards through steps until we find the commposition for ii, i in enumerate(steplist): oldspace = resume(read_only=True, step=int(re.match('s([0-9]+)-', i).group(1))) if prereq in oldspace: wordspace[prereq] = deepcopy(oldspace[prereq]) break #---if composition is available we continue wordspace['new_composition'] = [[name, count * factor] for name, count in wordspace['composition'] ] kwargs = {} if 'buffer' in wordspace: kwargs['flag'] = ' -dist %.2f %.2f %.2f' % tuple(wordspace['buffer']) gmx('genconf', structure='system-input', gro='system-multiply', nbox="%d %d %d" % (nx, ny, nz), log='genconf-multiply', **kwargs) #---copy ITP files for itp in wordspace.itp: filecopy(wordspace.last_step + itp, wordspace.step + itp) #---reorder the GRO for convenience with open(wordspace['step'] + 'system-multiply.gro') as fp: lines = fp.readlines() #---collect all unique resiue/atom combinations combos = list(set([l[5:15] for l in lines])) #---for each element in the composition, extract all of the residues for that element lines_reorder = [] lines_reorder.extend(lines[:2]) #---develop a list of filtering rules keylist = {} for key, count in wordspace['new_composition']: if key in [wordspace[i] for i in ['anion', 'cation']]: keylist[key] = 'regex', (('ION', key), slice(5, 15), '\s*%s\s*%s\s*') elif re.match('^(p|P)rotein', key) and key + '.itp' in wordspace.itp: #---custom procedure for finding proteins which have variegated residue numbers itp = read_itp(wordspace.step + key + '.itp') residues_starts = [] seq = list(zip(*itp['atoms'])[3]) residues = [i[5:10].strip() for i in lines] for i in range(len(residues) - len(seq)): #---minor speed up by checking the first one if seq[0] == residues[i] and residues[i:i + len(seq)] == seq: residues_starts.append(i) keylist[key] = 'slices', [ slice(i, i + len(seq)) for i in residues_starts ] else: keylist[key] = 'regex', (key, slice(5, 10), '\s*%s\s*') for key, count in wordspace['new_composition']: method, details = keylist[key] if method == 'regex': key, sl, regex = details lines_reorder.extend( [l for l in lines[2:-1] if re.match(regex % key, l[sl])]) elif method == 'slices': for sl in details: lines_reorder.extend(lines[sl]) else: raise lines_reorder.extend([lines[-1]]) with open(wordspace['step'] + 'system-multiply-reorder.gro', 'w') as fp: for line in lines_reorder: fp.write(line) filecopy(wordspace['step'] + 'system-multiply-reorder.gro', wordspace['step'] + 'system.gro') wordspace['composition'] = tuple(wordspace['new_composition']) del wordspace['new_composition']
def solvate_bilayer(structure='vacuum'): """ Solvate a CGMD bilayer (possibly with proteins) avoiding overlaps. """ #---check the size of the slab incoming_structure = str(structure) boxdims_old, boxdims = get_box_vectors(structure) #---! standardize these? basedim = 3.64428 waterbox = 'inputs/martini-water' #---make an oversized water box newdims = boxdims_old[:2] + [wordspace['solvent_thickness']] #import pdb;pdb.set_trace() gmx('genconf', structure='martini-water', gro='solvate-empty-uncentered-untrimmed', nbox=' '.join([str(int(i / basedim + 1)) for i in newdims]), log='genconf') #---trimming waters with open(wordspace['step'] + 'solvate-empty-uncentered-untrimmed.gro', 'r') as fp: lines = fp.readlines() modlines = [] for line in lines[2:-1]: coords = [float(i) for i in line[20:].split()][:3] if all([coords[i] < newdims[i] for i in range(3)]): modlines.append(line) with open(wordspace['step'] + 'solvate-empty-uncentered.gro', 'w') as fp: fp.write(lines[0]) fp.write(str(len(modlines)) + '\n') for l in modlines: fp.write(l) fp.write(lines[-1]) #---update waters structure = 'solvate-empty-uncentered' component('W', count=count_molecules(structure, 'W')) #---translate the water box gmx('editconf', structure=structure, gro='solvate-water-shifted', flag='-translate 0 0 %f' % (wordspace['bilayer_dimensions_slab'][2] / 2.), log='editconf-solvate-shift') #---combine and trim with new box vectors #---! skipping minimization? structure = 'solvate-water-shifted' boxdims_old, boxdims = get_box_vectors(structure) boxvecs = wordspace['bilayer_dimensions_slab'][:2] + [ wordspace['bilayer_dimensions_slab'][2] + boxdims[2] ] gro_combinator('%s.gro' % incoming_structure, structure, box=boxvecs, cwd=wordspace['step'], gro='solvate-dense') structure = 'solvate-dense' trim_waters(structure=structure, gro='solvate', boxcut=False, gap=wordspace['protein_water_gap'], method='cgmd', boxvecs=boxvecs) structure = 'solvate' nwaters = count_molecules(structure, 'W') component('W', count=nwaters) wordspace['bilayer_dimensions_solvate'] = boxvecs wordspace['water_without_ions'] = nwaters