def build_cgmd_protein(): """ Use martinize to generate a coarse-grained protein. """ name = 'protein' cwd = wordspace['step'] martinize_fn = os.path.expanduser(wordspace['martinize_path']) #---this function is run from the step but martinize_path is relative to root assert os.path.isfile(martinize_fn) cmd = 'python '+martinize_fn+' -v -p backbone ' cmd += ' -f protein-start.pdb -o %s.top -x %s.pdb'%(name,name) if 'dssp' in wordspace: cmd += ' -dssp %s'%os.path.abspath(os.path.expanduser(wordspace['dssp'])) if 'martinize_ff' in wordspace: cmd += ' -ff %s'%wordspace['martinize_ff'] if 'martinize_flags' in wordspace: cmd += ' '+wordspace['martinize_flags'] bash(cmd,cwd=wordspace['step'],log='martinize') assert os.path.isfile(wordspace['step']+'protein.pdb') gmx_run(gmxpaths['editconf']+' -f %s.pdb -o %s.gro'%(name,name),log='editconf-convert-pdb') #---only allow Z-restraints because this is probably for a bilayer bash("sed -i 's/POSRES_FC POSRES_FC POSRES_FC/0 0 POSRES_FC/g' Protein.itp", cwd=wordspace['step'])
def get_last_frame(tpr=False,cpt=False,top=False,ndx=False,itp=False): """ Get the last frame of any step in this simulation. This function is not narrated because the watch file is typically not ready until the new step directory is created at which point you cannot use detect_last to get the last frame easily. """ if 'last_step' not in wordspace or 'last_part' not in wordspace: raise Exception('[ERROR] use detect_last to add last_step,last_part to the wordspace') last_step,part_num = wordspace['last_step'],wordspace['last_part'] last_frame_exists = last_step+'md.part%04d.gro'%part_num if os.path.isfile(last_frame_exists): shutil.copyfile(last_frame_exists,wordspace['step']+'system-input.gro') else: xtc = os.path.join(os.getcwd(),last_step+'md.part%04d.xtc'%wordspace['last_part']) if not os.path.isfile(xtc): raise Exception('cannot locate %s'%xtc) logfile = 'gmxcheck-%s-part%04d'%(last_step.rstrip('/'),part_num) gmx_run(' '.join([gmxpaths['gmxcheck'],'-f '+xtc]),log=logfile) with open(wordspace['step']+'log-'+logfile) as fp: lines = re.sub('\r','\n',fp.read()).split('\n') last_step_regex = '^Step\s+([0-9]+)\s*([0-9]+)' first_step_regex = '^Reading frame\s+0\s+time\s+(.+)' first_frame_time = [float(re.findall(first_step_regex,l)[0][0]) for l in lines if re.match(first_step_regex,l)][0] last_step_regex = '^Step\s+([0-9]+)\s*([0-9]+)' nframes,timestep = [int(j) for j in [re.findall(last_step_regex,l)[0] for l in lines if re.match(last_step_regex,l)][0]] #---! last viable time may not be available so this needs better error-checking last_time = float(int((float(nframes)-1)*timestep)) last_time = round(last_time/10)*10 #---interesting that trjconv uses fewer digits than the gro so this is not a perfect match #---note that we select group zero which is always the entire system #---note that we assume a like-named TPR file is available try: gmx_run(gmxpaths['trjconv']+' -f %s -o %s -s %s.tpr -b %f -e %f'%( xtc,'system-input.gro',xtc.rstrip('.xtc'),last_time,last_time), log='trjconv-last-frame',inpipe='0\n') except: raise Exception(''.join(['\n[ERROR] %s'%i for i in [ 'trjconv in get_last_frame failed', 'if you are running a restart with an alternate version of gromacs,', 'you should just get the last frame manually with the original version.']])) #---list of files we must retrieve upstream_files = { 'tpr':{'from':last_step+'md.part%04d.tpr'%part_num,'to':'system-input.tpr','required':True}, 'cpt':{'from':last_step+'md.part%04d.cpt'%part_num,'to':'system-input.cpt','required':True}, 'top':{'from':last_step+'system.top','to':'system.top','required':True}, 'ndx':{'from':last_step+'system-groups.ndx','to':'system-groups.ndx','required':False}, } if not tpr: upstream_files.pop('tpr') if not cpt: upstream_files.pop('cpt') if not top: upstream_files.pop('top') if not ndx: upstream_files.pop('ndx') if itp: #---the itp flag means we need to acquire the force field and itp files from the previous run #---note that we are skipping the ff_includes here because they should be in a sources folder #---note that it was necessary to manually add ff_includes for an older protein run if wordspace['itp']: for fn in wordspace['itp']: upstream_files[fn] = {'from':last_step+'/'+fn,'to':fn,'required':True} if wordspace['sources']: for fn in wordspace['sources']: upstream_files[fn] = {'from':last_step+'/'+fn,'to':fn,'required':True} #---! hardcoded force field options here but consider making this more general #---! why is this hacked below? with "or 1" (removed for testing) if wordspace['force_field'] in ['charmm27']: #---remove items which are always available in the GROMACS share folder for key in ['ions','tip3p','forcefield']: if key in upstream_files: upstream_files.pop(key) #---copy files for key,val in upstream_files.items(): dest = wordspace['step']+val['to'] if not os.path.isfile(val['from']) and not os.path.isdir(val['from']): if val['required']: import pdb;pdb.set_trace() raise Exception('cannot find %s'%val['to']) elif not os.path.isfile(dest) and not os.path.isdir(dest): if os.path.isfile(val['from']): shutil.copyfile(val['from'],wordspace['step']+val['to']) else: shutil.copytree(val['from'],wordspace['step']+val['to'])
def trim_waters(structure='solvate-dense',gro='solvate', gap=3,boxvecs=None,method='aamd',boxcut=True): """ trim_waters(structure='solvate-dense',gro='solvate',gap=3,boxvecs=None) Remove waters within a certain number of Angstroms of the protein. #### water and all (water and (same residue as water within 10 of not water)) note that we vided the solvate.gro as a default so this can be used with any output gro file """ use_vmd = wordspace.get('use_vmd',False) if (gap != 0.0 or boxcut) and use_vmd: if method == 'aamd': watersel = "water" elif method == 'cgmd': watersel = "resname %s"%wordspace.sol else: raise Exception("\n[ERROR] unclear method %s"%method) #---! gap should be conditional and excluded if zero vmdtrim = [ 'package require pbctools', 'mol new %s.gro'%structure, 'set sel [atomselect top \"(all not ('+\ '%s and (same residue as %s and within '%(watersel,watersel)+str(gap)+\ ' of not %s)))'%watersel] #---box trimming is typical for e.g. atomstic protein simulations but discards anything outside if boxcut: vmdtrim += [' and '+\ 'same residue as (x>=0 and x<='+str(10*boxvecs[0])+\ ' and y>=0 and y<= '+str(10*boxvecs[1])+\ ' and z>=0 and z<= '+str(10*boxvecs[2])+')'] vmdtrim += ['"]','$sel writepdb %s-vmd.pdb'%gro,'exit',] with open(wordspace['step']+'script-vmd-trim.tcl','w') as fp: for line in vmdtrim: fp.write(line+'\n') vmdlog = open(wordspace['step']+'log-script-vmd-trim','w') #---previously used os.environ['VMDNOCUDA'] = "1" but this was causing segfaults on green p = subprocess.Popen('VMDNOCUDA=1 '+gmxpaths['vmd']+' -dispdev text -e script-vmd-trim.tcl', stdout=vmdlog,stderr=vmdlog,cwd=wordspace['step'],shell=True,executable='/bin/bash') p.communicate() with open(wordspace['bash_log'],'a') as fp: fp.write(gmxpaths['vmd']+' -dispdev text -e script-vmd-trim.tcl &> log-script-vmd-trim\n') gmx_run(gmxpaths['editconf']+' -f %s-vmd.pdb -o %s.gro -resnr 1'%(gro,gro), log='editconf-convert-vmd') #---scipy is more reliable than VMD elif gap != 0.0 or boxcut: import scipy import scipy.spatial import numpy as np #---if "sol" is not in the wordspace we assume this is atomistic and use the standard "SOL" watersel = wordspace.get('sol','SOL') incoming = read_gro(structure+'.gro') #---remove waters that are near not-waters is_water = np.array(incoming['residue_names'])==watersel is_not_water = np.array(incoming['residue_names'])!=watersel water_inds = np.where(is_water)[0] not_water_inds = np.where(np.array(incoming['residue_names'])!=watersel)[0] points = np.array(incoming['points']) residue_indices = np.array(incoming['residue_indices']) if gap>0: #---previous method used clumsy/slow cdist if False: #---! needs KDTree optimization dists = scipy.spatial.distance.cdist(points[water_inds],points[not_water_inds]) #---list of residue indices in is_water that have at least one atom with an overlap excludes = np.array(incoming['residue_indices'])[is_water][ np.where(np.any(dists<=gap/10.0,axis=1))[0]] #---collect waters not found in the excludes list of residues that overlap with not-water #---note that this command fails on redundant residues #---this was deprecated because it wasn't working correctly with the new KDTree method below surviving_water = np.all((np.all(( np.tile(excludes,(len(residue_indices),1))!=np.tile(residue_indices,(len(excludes),1)).T), axis=1),is_water),axis=0) #---use scipy KDTree to find atom names inside the gap #---note that order matters: we wish to find waters too close to not_waters close_dists,neighbors = scipy.spatial.KDTree(points[water_inds]).query(points[not_water_inds],distance_upper_bound=gap/10.0) #---use the distances to find the residue indices for waters that are too close excludes = np.array(incoming['residue_indices'])[is_water][np.where(close_dists<=gap/10.0)[0]] #---get residues that are water and in the exclude list #---note that the following step might be slow exclude_res = [ii for ii,i in enumerate(incoming['residue_indices']) if i in excludes and is_water[ii]] #---copy the array that marks the waters surviving_water = np.array(is_water) #---remove waters that are on the exclude list surviving_water[exclude_res] = False else: excludes = np.array([]) surviving_water = np.ones(len(residue_indices)).astype(bool) #---we must remove waters that lie outside the box if there is a boxcut insiders = np.ones(len(points)).astype(bool) if boxcut: #---remove waters that lie outside the box #---get points that are outside of the box outsiders = np.any([np.any((points[:,ii]<0,points[:,ii]>i),axis=0) for ii,i in enumerate(boxvecs)],axis=0) #---get residue numbers for the outsiders outsiders_res = np.array(incoming['residue_indices'])[np.where(outsiders)[0]] #---note that this is consonant with the close-water exclude step above (and also may be slow) exclude_outsider_res = [ii for ii,i in enumerate(incoming['residue_indices']) if i in outsiders_res] insiders[exclude_outsider_res] = False surviving_indices = np.any((is_not_water,np.all((surviving_water,insiders),axis=0)),axis=0) lines = incoming['lines'] lines = lines[:2]+list(np.array(incoming['lines'][2:-1])[surviving_indices])+lines[-1:] xyzs = list(points[surviving_indices]) write_gro(lines=lines,xyzs=xyzs,output_file=wordspace.step+'%s.gro'%gro) else: filecopy(wordspace['step']+'%s-dense.gro'%gro,wordspace['step']+'%s.gro'%gro)
def get_last_frame(tpr=False, cpt=False, top=False, ndx=False, itp=False): """ Get the last frame of any step in this simulation. This function is not narrated because the watch file is typically not ready until the new step directory is created at which point you cannot use detect_last to get the last frame easily. """ if 'last_step' not in wordspace or 'last_part' not in wordspace: raise Exception( '[ERROR] use detect_last to add last_step,last_part to the wordspace' ) last_step, part_num = wordspace['last_step'], wordspace['last_part'] last_frame_exists = last_step + 'md.part%04d.gro' % part_num if os.path.isfile(last_frame_exists): shutil.copyfile(last_frame_exists, wordspace['step'] + 'system-input.gro') else: xtc = os.path.join( os.getcwd(), last_step + 'md.part%04d.xtc' % wordspace['last_part']) if not os.path.isfile(xtc): raise Exception('cannot locate %s' % xtc) logfile = 'gmxcheck-%s-part%04d' % (last_step.rstrip('/'), part_num) gmx_run(' '.join([gmxpaths['gmxcheck'], '-f ' + xtc]), log=logfile) with open(wordspace['step'] + 'log-' + logfile) as fp: lines = re.sub('\r', '\n', fp.read()).split('\n') last_step_regex = '^Step\s+([0-9]+)\s*([0-9]+)' first_step_regex = '^Reading frame\s+0\s+time\s+(.+)' first_frame_time = [ float(re.findall(first_step_regex, l)[0][0]) for l in lines if re.match(first_step_regex, l) ][0] last_step_regex = '^Step\s+([0-9]+)\s*([0-9]+)' nframes, timestep = [ int(j) for j in [ re.findall(last_step_regex, l)[0] for l in lines if re.match(last_step_regex, l) ][0] ] #---! last viable time may not be available so this needs better error-checking last_time = float(int((float(nframes) - 1) * timestep)) last_time = round(last_time / 10) * 10 #---interesting that trjconv uses fewer digits than the gro so this is not a perfect match #---note that we select group zero which is always the entire system #---note that we assume a like-named TPR file is available try: gmx_run(gmxpaths['trjconv'] + ' -f %s -o %s -s %s.tpr -b %f -e %f' % (xtc, 'system-input.gro', xtc.rstrip('.xtc'), last_time, last_time), log='trjconv-last-frame', inpipe='0\n') except: raise Exception(''.join([ '\n[ERROR] %s' % i for i in [ 'trjconv in get_last_frame failed', 'if you are running a restart with an alternate version of gromacs,', 'you should just get the last frame manually with the original version.' ] ])) #---list of files we must retrieve upstream_files = { 'tpr': { 'from': last_step + 'md.part%04d.tpr' % part_num, 'to': 'system-input.tpr', 'required': True }, 'cpt': { 'from': last_step + 'md.part%04d.cpt' % part_num, 'to': 'system-input.cpt', 'required': True }, 'top': { 'from': last_step + 'system.top', 'to': 'system.top', 'required': True }, 'ndx': { 'from': last_step + 'system-groups.ndx', 'to': 'system-groups.ndx', 'required': False }, } if not tpr: upstream_files.pop('tpr') if not cpt: upstream_files.pop('cpt') if not top: upstream_files.pop('top') if not ndx: upstream_files.pop('ndx') if itp: #---the itp flag means we need to acquire the force field and itp files from the previous run #---note that we are skipping the ff_includes here because they should be in a sources folder #---note that it was necessary to manually add ff_includes for an older protein run if wordspace['itp']: for fn in wordspace['itp']: upstream_files[fn] = { 'from': last_step + '/' + fn, 'to': fn, 'required': True } if wordspace['sources']: for fn in wordspace['sources']: upstream_files[fn] = { 'from': last_step + '/' + fn, 'to': fn, 'required': True } #---! hardcoded force field options here but consider making this more general #---! why is this hacked below? with "or 1" (removed for testing) if wordspace['force_field'] in ['charmm27']: #---remove items which are always available in the GROMACS share folder for key in ['ions', 'tip3p', 'forcefield']: if key in upstream_files: upstream_files.pop(key) #---copy files for key, val in upstream_files.items(): dest = wordspace['step'] + val['to'] if not os.path.isfile(val['from']) and not os.path.isdir(val['from']): if val['required']: import pdb pdb.set_trace() raise Exception('cannot find %s' % val['to']) elif not os.path.isfile(dest) and not os.path.isdir(dest): if os.path.isfile(val['from']): shutil.copyfile(val['from'], wordspace['step'] + val['to']) else: shutil.copytree(val['from'], wordspace['step'] + val['to'])
def trim_waters(structure='solvate-dense', gro='solvate', gap=3, boxvecs=None, method='aamd', boxcut=True): """ trim_waters(structure='solvate-dense',gro='solvate',gap=3,boxvecs=None) Remove waters within a certain number of Angstroms of the protein. #### water and all (water and (same residue as water within 10 of not water)) note that we vided the solvate.gro as a default so this can be used with any output gro file """ use_vmd = wordspace.get('use_vmd', False) if (gap != 0.0 or boxcut) and use_vmd: if method == 'aamd': watersel = "water" elif method == 'cgmd': watersel = "resname %s" % wordspace.sol else: raise Exception("\n[ERROR] unclear method %s" % method) #---! gap should be conditional and excluded if zero vmdtrim = [ 'package require pbctools', 'mol new %s.gro'%structure, 'set sel [atomselect top \"(all not ('+\ '%s and (same residue as %s and within '%(watersel,watersel)+str(gap)+\ ' of not %s)))'%watersel] #---box trimming is typical for e.g. atomstic protein simulations but discards anything outside if boxcut: vmdtrim += [' and '+\ 'same residue as (x>=0 and x<='+str(10*boxvecs[0])+\ ' and y>=0 and y<= '+str(10*boxvecs[1])+\ ' and z>=0 and z<= '+str(10*boxvecs[2])+')'] vmdtrim += [ '"]', '$sel writepdb %s-vmd.pdb' % gro, 'exit', ] with open(wordspace['step'] + 'script-vmd-trim.tcl', 'w') as fp: for line in vmdtrim: fp.write(line + '\n') vmdlog = open(wordspace['step'] + 'log-script-vmd-trim', 'w') #---previously used os.environ['VMDNOCUDA'] = "1" but this was causing segfaults on green p = subprocess.Popen('VMDNOCUDA=1 ' + gmxpaths['vmd'] + ' -dispdev text -e script-vmd-trim.tcl', stdout=vmdlog, stderr=vmdlog, cwd=wordspace['step'], shell=True, executable='/bin/bash') p.communicate() with open(wordspace['bash_log'], 'a') as fp: fp.write( gmxpaths['vmd'] + ' -dispdev text -e script-vmd-trim.tcl &> log-script-vmd-trim\n' ) gmx_run(gmxpaths['editconf'] + ' -f %s-vmd.pdb -o %s.gro -resnr 1' % (gro, gro), log='editconf-convert-vmd') #---scipy is more reliable than VMD elif gap != 0.0 or boxcut: import scipy import scipy.spatial import numpy as np #---if "sol" is not in the wordspace we assume this is atomistic and use the standard "SOL" watersel = wordspace.get('sol', 'SOL') incoming = read_gro(structure + '.gro') #---remove waters that are near not-waters is_water = np.array(incoming['residue_names']) == watersel is_not_water = np.array(incoming['residue_names']) != watersel water_inds = np.where(is_water)[0] not_water_inds = np.where( np.array(incoming['residue_names']) != watersel)[0] points = np.array(incoming['points']) residue_indices = np.array(incoming['residue_indices']) if gap > 0: #---previous method used clumsy/slow cdist if False: #---! needs KDTree optimization dists = scipy.spatial.distance.cdist(points[water_inds], points[not_water_inds]) #---list of residue indices in is_water that have at least one atom with an overlap excludes = np.array( incoming['residue_indices'])[is_water][np.where( np.any(dists <= gap / 10.0, axis=1))[0]] #---collect waters not found in the excludes list of residues that overlap with not-water #---note that this command fails on redundant residues #---this was deprecated because it wasn't working correctly with the new KDTree method below surviving_water = np.all((np.all( (np.tile(excludes, (len(residue_indices), 1)) != np.tile( residue_indices, (len(excludes), 1)).T), axis=1), is_water), axis=0) #---use scipy KDTree to find atom names inside the gap #---note that order matters: we wish to find waters too close to not_waters close_dists, neighbors = scipy.spatial.KDTree( points[water_inds]).query(points[not_water_inds], distance_upper_bound=gap / 10.0) #---use the distances to find the residue indices for waters that are too close excludes = np.array( incoming['residue_indices'])[is_water][np.where( close_dists <= gap / 10.0)[0]] #---get residues that are water and in the exclude list #---note that the following step might be slow exclude_res = [ ii for ii, i in enumerate(incoming['residue_indices']) if i in excludes and is_water[ii] ] #---copy the array that marks the waters surviving_water = np.array(is_water) #---remove waters that are on the exclude list surviving_water[exclude_res] = False else: excludes = np.array([]) surviving_water = np.ones(len(residue_indices)).astype(bool) #---we must remove waters that lie outside the box if there is a boxcut insiders = np.ones(len(points)).astype(bool) if boxcut: #---remove waters that lie outside the box #---get points that are outside of the box outsiders = np.any([ np.any((points[:, ii] < 0, points[:, ii] > i), axis=0) for ii, i in enumerate(boxvecs) ], axis=0) #---get residue numbers for the outsiders outsiders_res = np.array( incoming['residue_indices'])[np.where(outsiders)[0]] #---note that this is consonant with the close-water exclude step above (and also may be slow) exclude_outsider_res = [ ii for ii, i in enumerate(incoming['residue_indices']) if i in outsiders_res ] insiders[exclude_outsider_res] = False surviving_indices = np.any( (is_not_water, np.all((surviving_water, insiders), axis=0)), axis=0) lines = incoming['lines'] lines = lines[:2] + list( np.array(incoming['lines'][2:-1])[surviving_indices]) + lines[-1:] xyzs = list(points[surviving_indices]) write_gro(lines=lines, xyzs=xyzs, output_file=wordspace.step + '%s.gro' % gro) else: filecopy(wordspace['step'] + '%s-dense.gro' % gro, wordspace['step'] + '%s.gro' % gro)