Beispiel #1
0
def build_cgmd_protein():

	"""
	Use martinize to generate a coarse-grained protein.
	"""

	name = 'protein'
	cwd = wordspace['step']
	martinize_fn = os.path.expanduser(wordspace['martinize_path'])
	#---this function is run from the step but martinize_path is relative to root
	assert os.path.isfile(martinize_fn)
	cmd = 'python '+martinize_fn+' -v -p backbone '
	cmd += ' -f protein-start.pdb -o %s.top -x %s.pdb'%(name,name)
	if 'dssp' in wordspace: cmd += ' -dssp %s'%os.path.abspath(os.path.expanduser(wordspace['dssp']))
	if 'martinize_ff' in wordspace: cmd += ' -ff %s'%wordspace['martinize_ff']
	if 'martinize_flags' in wordspace: cmd += ' '+wordspace['martinize_flags']
	bash(cmd,cwd=wordspace['step'],log='martinize')
	assert os.path.isfile(wordspace['step']+'protein.pdb')
	gmx_run(gmxpaths['editconf']+' -f %s.pdb -o %s.gro'%(name,name),log='editconf-convert-pdb')
	#---only allow Z-restraints because this is probably for a bilayer
	bash("sed -i 's/POSRES_FC    POSRES_FC    POSRES_FC/0 0 POSRES_FC/g' Protein.itp",
		cwd=wordspace['step'])
Beispiel #2
0
def get_last_frame(tpr=False,cpt=False,top=False,ndx=False,itp=False):

	"""
	Get the last frame of any step in this simulation.
	This function is not narrated because the watch file is typically not ready until the new step 
	directory is created at which point you cannot use detect_last to get the last frame easily.
	"""

	if 'last_step' not in wordspace or 'last_part' not in wordspace:
		raise Exception('[ERROR] use detect_last to add last_step,last_part to the wordspace')
	last_step,part_num = wordspace['last_step'],wordspace['last_part']
	last_frame_exists = last_step+'md.part%04d.gro'%part_num
	if os.path.isfile(last_frame_exists): 
		shutil.copyfile(last_frame_exists,wordspace['step']+'system-input.gro')
	else:
		xtc = os.path.join(os.getcwd(),last_step+'md.part%04d.xtc'%wordspace['last_part'])
		if not os.path.isfile(xtc): raise Exception('cannot locate %s'%xtc)
		logfile = 'gmxcheck-%s-part%04d'%(last_step.rstrip('/'),part_num)
		gmx_run(' '.join([gmxpaths['gmxcheck'],'-f '+xtc]),log=logfile)
		with open(wordspace['step']+'log-'+logfile) as fp: lines = re.sub('\r','\n',fp.read()).split('\n')
		last_step_regex = '^Step\s+([0-9]+)\s*([0-9]+)'
		first_step_regex = '^Reading frame\s+0\s+time\s+(.+)'
		first_frame_time = [float(re.findall(first_step_regex,l)[0][0]) 
			for l in lines if re.match(first_step_regex,l)][0]
		last_step_regex = '^Step\s+([0-9]+)\s*([0-9]+)'
		nframes,timestep = [int(j) for j in [re.findall(last_step_regex,l)[0] 
			for l in lines if re.match(last_step_regex,l)][0]]
		#---! last viable time may not be available so this needs better error-checking
		last_time = float(int((float(nframes)-1)*timestep))
		last_time = round(last_time/10)*10
		#---interesting that trjconv uses fewer digits than the gro so this is not a perfect match
		#---note that we select group zero which is always the entire system
		#---note that we assume a like-named TPR file is available
		try:
			gmx_run(gmxpaths['trjconv']+' -f %s -o %s -s %s.tpr -b %f -e %f'%(
				xtc,'system-input.gro',xtc.rstrip('.xtc'),last_time,last_time),
				log='trjconv-last-frame',inpipe='0\n')
		except:
			raise Exception(''.join(['\n[ERROR] %s'%i for i in [
				'trjconv in get_last_frame failed',
				'if you are running a restart with an alternate version of gromacs,',
				'you should just get the last frame manually with the original version.']]))
	#---list of files we must retrieve
	upstream_files = {
		'tpr':{'from':last_step+'md.part%04d.tpr'%part_num,'to':'system-input.tpr','required':True},
		'cpt':{'from':last_step+'md.part%04d.cpt'%part_num,'to':'system-input.cpt','required':True},
		'top':{'from':last_step+'system.top','to':'system.top','required':True},
		'ndx':{'from':last_step+'system-groups.ndx','to':'system-groups.ndx','required':False},
		}
	if not tpr: upstream_files.pop('tpr')
	if not cpt: upstream_files.pop('cpt')
	if not top: upstream_files.pop('top')
	if not ndx: upstream_files.pop('ndx')
	if itp:
		#---the itp flag means we need to acquire the force field and itp files from the previous run
		#---note that we are skipping the ff_includes here because they should be in a sources folder
		#---note that it was necessary to manually add ff_includes for an older protein run 
		if wordspace['itp']:
			for fn in wordspace['itp']: 
				upstream_files[fn] = {'from':last_step+'/'+fn,'to':fn,'required':True}
		if wordspace['sources']:
			for fn in wordspace['sources']: 
				upstream_files[fn] = {'from':last_step+'/'+fn,'to':fn,'required':True}
	#---! hardcoded force field options here but consider making this more general
	#---! why is this hacked below? with "or 1" (removed for testing)
	if wordspace['force_field'] in ['charmm27']:
		#---remove items which are always available in the GROMACS share folder
		for key in ['ions','tip3p','forcefield']: 
			if key in upstream_files: upstream_files.pop(key)
	#---copy files
	for key,val in upstream_files.items():
		dest = wordspace['step']+val['to']
		if not os.path.isfile(val['from']) and not os.path.isdir(val['from']):
			if val['required']: 
				import pdb;pdb.set_trace()
				raise Exception('cannot find %s'%val['to'])
		elif not os.path.isfile(dest) and not os.path.isdir(dest): 
			if os.path.isfile(val['from']): shutil.copyfile(val['from'],wordspace['step']+val['to'])
			else: shutil.copytree(val['from'],wordspace['step']+val['to'])
Beispiel #3
0
def trim_waters(structure='solvate-dense',gro='solvate',
	gap=3,boxvecs=None,method='aamd',boxcut=True):

	"""
	trim_waters(structure='solvate-dense',gro='solvate',gap=3,boxvecs=None)
	Remove waters within a certain number of Angstroms of the protein.
	#### water and all (water and (same residue as water within 10 of not water))
	note that we vided the solvate.gro as a default so this can be used with any output gro file
	"""

	use_vmd = wordspace.get('use_vmd',False)
	if (gap != 0.0 or boxcut) and use_vmd:
		if method == 'aamd': watersel = "water"
		elif method == 'cgmd': watersel = "resname %s"%wordspace.sol
		else: raise Exception("\n[ERROR] unclear method %s"%method)
		#---! gap should be conditional and excluded if zero
		vmdtrim = [
			'package require pbctools',
			'mol new %s.gro'%structure,
			'set sel [atomselect top \"(all not ('+\
			'%s and (same residue as %s and within '%(watersel,watersel)+str(gap)+\
			' of not %s)))'%watersel]
		#---box trimming is typical for e.g. atomstic protein simulations but discards anything outside
		if boxcut:
			vmdtrim += [' and '+\
			'same residue as (x>=0 and x<='+str(10*boxvecs[0])+\
			' and y>=0 and y<= '+str(10*boxvecs[1])+\
			' and z>=0 and z<= '+str(10*boxvecs[2])+')']
		vmdtrim += ['"]','$sel writepdb %s-vmd.pdb'%gro,'exit',]			
		with open(wordspace['step']+'script-vmd-trim.tcl','w') as fp:
			for line in vmdtrim: fp.write(line+'\n')
		vmdlog = open(wordspace['step']+'log-script-vmd-trim','w')
		#---previously used os.environ['VMDNOCUDA'] = "1" but this was causing segfaults on green
		p = subprocess.Popen('VMDNOCUDA=1 '+gmxpaths['vmd']+' -dispdev text -e script-vmd-trim.tcl',
			stdout=vmdlog,stderr=vmdlog,cwd=wordspace['step'],shell=True,executable='/bin/bash')
		p.communicate()
		with open(wordspace['bash_log'],'a') as fp:
			fp.write(gmxpaths['vmd']+' -dispdev text -e script-vmd-trim.tcl &> log-script-vmd-trim\n')
		gmx_run(gmxpaths['editconf']+' -f %s-vmd.pdb -o %s.gro -resnr 1'%(gro,gro),
			log='editconf-convert-vmd')
	#---scipy is more reliable than VMD
	elif gap != 0.0 or boxcut:
		import scipy
		import scipy.spatial
		import numpy as np
		#---if "sol" is not in the wordspace we assume this is atomistic and use the standard "SOL"
		watersel = wordspace.get('sol','SOL')
		incoming = read_gro(structure+'.gro')
		#---remove waters that are near not-waters
		is_water = np.array(incoming['residue_names'])==watersel
		is_not_water = np.array(incoming['residue_names'])!=watersel
		water_inds = np.where(is_water)[0]
		not_water_inds = np.where(np.array(incoming['residue_names'])!=watersel)[0]
		points = np.array(incoming['points'])
		residue_indices = np.array(incoming['residue_indices'])
		if gap>0:
			#---previous method used clumsy/slow cdist
			if False:
				#---! needs KDTree optimization
				dists = scipy.spatial.distance.cdist(points[water_inds],points[not_water_inds])
				#---list of residue indices in is_water that have at least one atom with an overlap
				excludes = np.array(incoming['residue_indices'])[is_water][
					np.where(np.any(dists<=gap/10.0,axis=1))[0]]
				#---collect waters not found in the excludes list of residues that overlap with not-water
				#---note that this command fails on redundant residues
				#---this was deprecated because it wasn't working correctly with the new KDTree method below
				surviving_water = np.all((np.all((
					np.tile(excludes,(len(residue_indices),1))!=np.tile(residue_indices,(len(excludes),1)).T),
					axis=1),is_water),axis=0)
			#---use scipy KDTree to find atom names inside the gap
			#---note that order matters: we wish to find waters too close to not_waters
			close_dists,neighbors = scipy.spatial.KDTree(points[water_inds]).query(points[not_water_inds],distance_upper_bound=gap/10.0)
			#---use the distances to find the residue indices for waters that are too close 
			excludes = np.array(incoming['residue_indices'])[is_water][np.where(close_dists<=gap/10.0)[0]]
			#---get residues that are water and in the exclude list
			#---note that the following step might be slow
			exclude_res = [ii for ii,i in enumerate(incoming['residue_indices']) if i in excludes and is_water[ii]]
			#---copy the array that marks the waters
			surviving_water = np.array(is_water)
			#---remove waters that are on the exclude list
			surviving_water[exclude_res] = False
		else: 
			excludes = np.array([])
			surviving_water = np.ones(len(residue_indices)).astype(bool)
		#---we must remove waters that lie outside the box if there is a boxcut
		insiders = np.ones(len(points)).astype(bool)
		if boxcut:
			#---remove waters that lie outside the box
			#---get points that are outside of the box
			outsiders = np.any([np.any((points[:,ii]<0,points[:,ii]>i),axis=0) 
				for ii,i in enumerate(boxvecs)],axis=0)
			#---get residue numbers for the outsiders
			outsiders_res = np.array(incoming['residue_indices'])[np.where(outsiders)[0]]
			#---note that this is consonant with the close-water exclude step above (and also may be slow)
			exclude_outsider_res = [ii for ii,i in 
				enumerate(incoming['residue_indices']) if i in outsiders_res]
			insiders[exclude_outsider_res] = False
		surviving_indices = np.any((is_not_water,np.all((surviving_water,insiders),axis=0)),axis=0)
		lines = incoming['lines']
		lines = lines[:2]+list(np.array(incoming['lines'][2:-1])[surviving_indices])+lines[-1:]
		xyzs = list(points[surviving_indices])
		write_gro(lines=lines,xyzs=xyzs,output_file=wordspace.step+'%s.gro'%gro)
	else: filecopy(wordspace['step']+'%s-dense.gro'%gro,wordspace['step']+'%s.gro'%gro)
Beispiel #4
0
def get_last_frame(tpr=False, cpt=False, top=False, ndx=False, itp=False):
    """
	Get the last frame of any step in this simulation.
	This function is not narrated because the watch file is typically not ready until the new step 
	directory is created at which point you cannot use detect_last to get the last frame easily.
	"""

    if 'last_step' not in wordspace or 'last_part' not in wordspace:
        raise Exception(
            '[ERROR] use detect_last to add last_step,last_part to the wordspace'
        )
    last_step, part_num = wordspace['last_step'], wordspace['last_part']
    last_frame_exists = last_step + 'md.part%04d.gro' % part_num
    if os.path.isfile(last_frame_exists):
        shutil.copyfile(last_frame_exists,
                        wordspace['step'] + 'system-input.gro')
    else:
        xtc = os.path.join(
            os.getcwd(),
            last_step + 'md.part%04d.xtc' % wordspace['last_part'])
        if not os.path.isfile(xtc): raise Exception('cannot locate %s' % xtc)
        logfile = 'gmxcheck-%s-part%04d' % (last_step.rstrip('/'), part_num)
        gmx_run(' '.join([gmxpaths['gmxcheck'], '-f ' + xtc]), log=logfile)
        with open(wordspace['step'] + 'log-' + logfile) as fp:
            lines = re.sub('\r', '\n', fp.read()).split('\n')
        last_step_regex = '^Step\s+([0-9]+)\s*([0-9]+)'
        first_step_regex = '^Reading frame\s+0\s+time\s+(.+)'
        first_frame_time = [
            float(re.findall(first_step_regex, l)[0][0]) for l in lines
            if re.match(first_step_regex, l)
        ][0]
        last_step_regex = '^Step\s+([0-9]+)\s*([0-9]+)'
        nframes, timestep = [
            int(j) for j in [
                re.findall(last_step_regex, l)[0] for l in lines
                if re.match(last_step_regex, l)
            ][0]
        ]
        #---! last viable time may not be available so this needs better error-checking
        last_time = float(int((float(nframes) - 1) * timestep))
        last_time = round(last_time / 10) * 10
        #---interesting that trjconv uses fewer digits than the gro so this is not a perfect match
        #---note that we select group zero which is always the entire system
        #---note that we assume a like-named TPR file is available
        try:
            gmx_run(gmxpaths['trjconv'] +
                    ' -f %s -o %s -s %s.tpr -b %f -e %f' %
                    (xtc, 'system-input.gro', xtc.rstrip('.xtc'), last_time,
                     last_time),
                    log='trjconv-last-frame',
                    inpipe='0\n')
        except:
            raise Exception(''.join([
                '\n[ERROR] %s' % i for i in [
                    'trjconv in get_last_frame failed',
                    'if you are running a restart with an alternate version of gromacs,',
                    'you should just get the last frame manually with the original version.'
                ]
            ]))
    #---list of files we must retrieve
    upstream_files = {
        'tpr': {
            'from': last_step + 'md.part%04d.tpr' % part_num,
            'to': 'system-input.tpr',
            'required': True
        },
        'cpt': {
            'from': last_step + 'md.part%04d.cpt' % part_num,
            'to': 'system-input.cpt',
            'required': True
        },
        'top': {
            'from': last_step + 'system.top',
            'to': 'system.top',
            'required': True
        },
        'ndx': {
            'from': last_step + 'system-groups.ndx',
            'to': 'system-groups.ndx',
            'required': False
        },
    }
    if not tpr: upstream_files.pop('tpr')
    if not cpt: upstream_files.pop('cpt')
    if not top: upstream_files.pop('top')
    if not ndx: upstream_files.pop('ndx')
    if itp:
        #---the itp flag means we need to acquire the force field and itp files from the previous run
        #---note that we are skipping the ff_includes here because they should be in a sources folder
        #---note that it was necessary to manually add ff_includes for an older protein run
        if wordspace['itp']:
            for fn in wordspace['itp']:
                upstream_files[fn] = {
                    'from': last_step + '/' + fn,
                    'to': fn,
                    'required': True
                }
        if wordspace['sources']:
            for fn in wordspace['sources']:
                upstream_files[fn] = {
                    'from': last_step + '/' + fn,
                    'to': fn,
                    'required': True
                }
    #---! hardcoded force field options here but consider making this more general
    #---! why is this hacked below? with "or 1" (removed for testing)
    if wordspace['force_field'] in ['charmm27']:
        #---remove items which are always available in the GROMACS share folder
        for key in ['ions', 'tip3p', 'forcefield']:
            if key in upstream_files: upstream_files.pop(key)
    #---copy files
    for key, val in upstream_files.items():
        dest = wordspace['step'] + val['to']
        if not os.path.isfile(val['from']) and not os.path.isdir(val['from']):
            if val['required']:
                import pdb
                pdb.set_trace()
                raise Exception('cannot find %s' % val['to'])
        elif not os.path.isfile(dest) and not os.path.isdir(dest):
            if os.path.isfile(val['from']):
                shutil.copyfile(val['from'], wordspace['step'] + val['to'])
            else:
                shutil.copytree(val['from'], wordspace['step'] + val['to'])
Beispiel #5
0
def trim_waters(structure='solvate-dense',
                gro='solvate',
                gap=3,
                boxvecs=None,
                method='aamd',
                boxcut=True):
    """
	trim_waters(structure='solvate-dense',gro='solvate',gap=3,boxvecs=None)
	Remove waters within a certain number of Angstroms of the protein.
	#### water and all (water and (same residue as water within 10 of not water))
	note that we vided the solvate.gro as a default so this can be used with any output gro file
	"""

    use_vmd = wordspace.get('use_vmd', False)
    if (gap != 0.0 or boxcut) and use_vmd:
        if method == 'aamd': watersel = "water"
        elif method == 'cgmd': watersel = "resname %s" % wordspace.sol
        else: raise Exception("\n[ERROR] unclear method %s" % method)
        #---! gap should be conditional and excluded if zero
        vmdtrim = [
         'package require pbctools',
         'mol new %s.gro'%structure,
         'set sel [atomselect top \"(all not ('+\
         '%s and (same residue as %s and within '%(watersel,watersel)+str(gap)+\
         ' of not %s)))'%watersel]
        #---box trimming is typical for e.g. atomstic protein simulations but discards anything outside
        if boxcut:
            vmdtrim += [' and '+\
            'same residue as (x>=0 and x<='+str(10*boxvecs[0])+\
            ' and y>=0 and y<= '+str(10*boxvecs[1])+\
            ' and z>=0 and z<= '+str(10*boxvecs[2])+')']
        vmdtrim += [
            '"]',
            '$sel writepdb %s-vmd.pdb' % gro,
            'exit',
        ]
        with open(wordspace['step'] + 'script-vmd-trim.tcl', 'w') as fp:
            for line in vmdtrim:
                fp.write(line + '\n')
        vmdlog = open(wordspace['step'] + 'log-script-vmd-trim', 'w')
        #---previously used os.environ['VMDNOCUDA'] = "1" but this was causing segfaults on green
        p = subprocess.Popen('VMDNOCUDA=1 ' + gmxpaths['vmd'] +
                             ' -dispdev text -e script-vmd-trim.tcl',
                             stdout=vmdlog,
                             stderr=vmdlog,
                             cwd=wordspace['step'],
                             shell=True,
                             executable='/bin/bash')
        p.communicate()
        with open(wordspace['bash_log'], 'a') as fp:
            fp.write(
                gmxpaths['vmd'] +
                ' -dispdev text -e script-vmd-trim.tcl &> log-script-vmd-trim\n'
            )
        gmx_run(gmxpaths['editconf'] + ' -f %s-vmd.pdb -o %s.gro -resnr 1' %
                (gro, gro),
                log='editconf-convert-vmd')
    #---scipy is more reliable than VMD
    elif gap != 0.0 or boxcut:
        import scipy
        import scipy.spatial
        import numpy as np
        #---if "sol" is not in the wordspace we assume this is atomistic and use the standard "SOL"
        watersel = wordspace.get('sol', 'SOL')
        incoming = read_gro(structure + '.gro')
        #---remove waters that are near not-waters
        is_water = np.array(incoming['residue_names']) == watersel
        is_not_water = np.array(incoming['residue_names']) != watersel
        water_inds = np.where(is_water)[0]
        not_water_inds = np.where(
            np.array(incoming['residue_names']) != watersel)[0]
        points = np.array(incoming['points'])
        residue_indices = np.array(incoming['residue_indices'])
        if gap > 0:
            #---previous method used clumsy/slow cdist
            if False:
                #---! needs KDTree optimization
                dists = scipy.spatial.distance.cdist(points[water_inds],
                                                     points[not_water_inds])
                #---list of residue indices in is_water that have at least one atom with an overlap
                excludes = np.array(
                    incoming['residue_indices'])[is_water][np.where(
                        np.any(dists <= gap / 10.0, axis=1))[0]]
                #---collect waters not found in the excludes list of residues that overlap with not-water
                #---note that this command fails on redundant residues
                #---this was deprecated because it wasn't working correctly with the new KDTree method below
                surviving_water = np.all((np.all(
                    (np.tile(excludes, (len(residue_indices), 1)) != np.tile(
                        residue_indices, (len(excludes), 1)).T),
                    axis=1), is_water),
                                         axis=0)
            #---use scipy KDTree to find atom names inside the gap
            #---note that order matters: we wish to find waters too close to not_waters
            close_dists, neighbors = scipy.spatial.KDTree(
                points[water_inds]).query(points[not_water_inds],
                                          distance_upper_bound=gap / 10.0)
            #---use the distances to find the residue indices for waters that are too close
            excludes = np.array(
                incoming['residue_indices'])[is_water][np.where(
                    close_dists <= gap / 10.0)[0]]
            #---get residues that are water and in the exclude list
            #---note that the following step might be slow
            exclude_res = [
                ii for ii, i in enumerate(incoming['residue_indices'])
                if i in excludes and is_water[ii]
            ]
            #---copy the array that marks the waters
            surviving_water = np.array(is_water)
            #---remove waters that are on the exclude list
            surviving_water[exclude_res] = False
        else:
            excludes = np.array([])
            surviving_water = np.ones(len(residue_indices)).astype(bool)
        #---we must remove waters that lie outside the box if there is a boxcut
        insiders = np.ones(len(points)).astype(bool)
        if boxcut:
            #---remove waters that lie outside the box
            #---get points that are outside of the box
            outsiders = np.any([
                np.any((points[:, ii] < 0, points[:, ii] > i), axis=0)
                for ii, i in enumerate(boxvecs)
            ],
                               axis=0)
            #---get residue numbers for the outsiders
            outsiders_res = np.array(
                incoming['residue_indices'])[np.where(outsiders)[0]]
            #---note that this is consonant with the close-water exclude step above (and also may be slow)
            exclude_outsider_res = [
                ii for ii, i in enumerate(incoming['residue_indices'])
                if i in outsiders_res
            ]
            insiders[exclude_outsider_res] = False
        surviving_indices = np.any(
            (is_not_water, np.all((surviving_water, insiders), axis=0)),
            axis=0)
        lines = incoming['lines']
        lines = lines[:2] + list(
            np.array(incoming['lines'][2:-1])[surviving_indices]) + lines[-1:]
        xyzs = list(points[surviving_indices])
        write_gro(lines=lines,
                  xyzs=xyzs,
                  output_file=wordspace.step + '%s.gro' % gro)
    else:
        filecopy(wordspace['step'] + '%s-dense.gro' % gro,
                 wordspace['step'] + '%s.gro' % gro)