Example #1
0
	def create_group(self,**kwargs):
	
		"""
		Create a group.
		"""

		sn = kwargs['sn']
		name = kwargs['group']
		select = kwargs['select']
		cols = 100 if 'cols' not in kwargs else kwargs['cols']
		#---naming convention holds that the group names follow the prefix and we suffix with ndx
		simkey = self.prefixer(sn)+'.'+name
		fn = '%s.ndx'%simkey
		#---see if we need to make this group
		if os.path.isfile(self.postdir+fn) and name in self.groups[sn]: return
		elif os.path.isfile(self.postdir+fn):
			if self.confirm_file(self.postdir+fn):
				self.groups[sn][name] = {'fn':fn,'select':select}
			return
		status('creating group %s'%simkey,tag='status')
		#---read the structure
		uni = gmxread(self.get_last_start_structure(sn))
		sel = mdasel(uni,select)
		#---write NDX 
		import numpy as np
		iii = sel.indices+1	
		rows = [iii[np.arange(cols*i,cols*(i+1) if cols*(i+1)<len(iii) else len(iii))] 
			for i in range(0,len(iii)/cols+1)]
		with open(self.postdir+fn,'w') as fp:
			fp.write('[ %s ]\n'%name)
			for line in rows:
				fp.write(' '.join(line.astype(str))+'\n')
		self.groups[sn][name] = {'fn':fn,'select':select}
Example #2
0
	def slice(self,sn,**kwargs):

		"""
		Interface to the slices dictionary. Handles all necessary inferences.
		Returns a subset of the self.slices dictionary indexed by group names.
		MORE DOCUMENTATION.
		"""

		#---default spotname
		self.cursor = kwargs.get('spot',self.cursor)
		part_name = kwargs.get('part_name',self.cursor[1])
		#---search for the simulation in all spots
		keys_to_sn = [key for key in self.slices.keys() if key[1]==sn and key[0][1]==part_name]
		if len(keys_to_sn)>1: raise Exception('found simulation %s in multiple spots!'%sn)
		elif not keys_to_sn: 
			raise Exception('failed to find slice key for sn "%s" and part "%s". '%(sn,part_name)+
				'this might happen if you are missing that simulation or the "spot" that holds it. '+
				'the cursor is "%s" and the spotname is "%s"'%(self.cursor,self.c))
		unique_key = keys_to_sn[0]
		if unique_key[0] != self.cursor:
			self.cursor = unique_key[0]
			status('moving cursor to %s,%s'%self.cursor,tag='status')
			#---! needs concerted motion of cursor/c
			self.c = self.cursor[0]
		if unique_key not in self.slices: 
			status('could not find slices for %s (is it in the specs file?)'%str(unique_key))
		return self.slices[unique_key]
Example #3
0
def lipid_areas2d(**kwargs):
    """
	Compute bilayer midplane structures for studying undulations.
	"""
    #---parameters
    sn = kwargs['sn']
    work = kwargs['workspace']
    calc = kwargs['calc']
    dat = kwargs['upstream']['lipid_mesh']
    i2s = lambda mn, fr, key: '%d.%d.%s' % (mn, fr, key)
    nmols = [int(dat[i2s(mn, 0, 'nmol')]) for mn in range(2)]
    nframes = int(dat['nframes'])
    #---! could not run in parallel?
    start = time.time()
    areas = [[], []]
    for mn in range(2):
        for fr in range(nframes):
            status('voronoi areas monolayer %s' % mn,
                   i=fr,
                   looplen=nframes,
                   start=start,
                   tag='compute')
            areas[mn].append(
                area_voronoi2d(dat[i2s(mn, fr, 'points')], nmols[mn]))
    #---pack
    attrs, result = {}, {}
    result['areas0'] = np.array(areas[0])
    result['areas1'] = np.array(areas[1])
    result['nframes'] = np.array(nframes)
    result['vecs'] = dat['vecs']
    result['monolayer_indices'] = dat['monolayer_indices']
    result['resnames'] = dat['resnames']
    return result, attrs
Example #4
0
def compute_rmsd(coords, align_selection=None, rmsd_selection=None):
    """
	Return RMSDs and rotations for a single trajectory.
	"""

    if type(align_selection) == type(None): alignsel = slice(None, None)
    else: alignsel = array(align_selection)
    if type(rmsd_selection) == type(None): rmsdsel = slice(None, None)
    else: rmsdsel = array(rmsd_selection)
    #---simple RMSD code
    rmsds = []
    r0 = coords[0]
    r0 -= mean(r0, axis=0)
    nframes = len(coords)
    for fr in range(0, nframes):
        status('RMSD', i=fr, looplen=nframes)
        r1 = coords[fr]
        r1 -= mean(r1, axis=0)
        #---computation of RMSD validated against VMD but no reflection
        U, s, Vt = linalg.svd(dot(r0[alignsel].T, r1[alignsel]))
        signer = identity(3)
        signer[2, 2] = sign(linalg.det(dot(Vt.T, U)))
        RM = dot(dot(U, signer), Vt)
        rmsds.append(
            sqrt(mean(sum((r0[rmsdsel].T - dot(RM, r1[rmsdsel].T))**2,
                          axis=0))))
    return rmsds
	def manual_prepare_compute(self):
		"""
		"""
		#---load the database
		start = time.time()
		session = self.sessions['hypothesis']
		for hh,hypo in enumerate(self.hypotheses):
			status('populating hypos',tag='load',i=hh,looplen=len(self.hypotheses),start=start)
			#---reduce step before checking database
			hypo_full = self.Hypothesis(**hypo)
			matches = session.query(self.Hypothesis).filter_by(**hypo_full.base()).all()
			if not any(matches): session.add(hypo_full)
		session.commit()
		session = self.sessions['field']
		for hh,hypo in enumerate(self.hypotheses):
			status('populating fields',tag='load',i=hh,looplen=len(self.hypotheses),start=start)
			#---reduce step before checking database
			hypo_full = self.Field(**hypo)
			matches = session.query(self.Field).filter_by(**hypo_full.base()).all()
			if not any(matches): 
				session.add(hypo_full)
		session.commit()
		#---integrity checks on database rows
		hypotheses_reduced = [i.dict() for i in self.sessions['hypothesis'].query(self.Hypothesis).all()]
		fields_reduced = [i.dict() for i in self.sessions['field'].query(self.Field).all()]
		assert not [i for i in hypotheses_reduced if i['mapping']=='protein' and i['curvature']==0.0]
		assert not [i for i in hypotheses_reduced if i['curvature']==0.0 
			and not (i['sigma_a']==1.0 and i['isotropy']==1.0 and i['sigma_b']==1.0)]
Example #6
0
	def slice_timeseries(self,grofile,trajfile,**kwargs):

		"""
		Get the time series from a trajectory slice.
		The workspace holds very little data that cannot be parsed from specs files.
		However timeseries data for newly-created slices or perhaps even original sources can be large and
		somewhat costly to generate for an entire data set. For that reason we dump these to disk. 
		For now we write the file based on the incoming trajfile name which should refer to new slices in
		the post directory. In the future we may extend this to sourced trajectories in a "spot".
		"""

		timefile = os.path.basename(re.sub('\.(xtc|trr)$','.clock',trajfile))
		diskwrite = kwargs.get('diskwrite',self.write_timeseries_to_disk)
		timefile_exists = os.path.isfile(os.path.join(self.postdir,timefile))
		if timefile_exists and not self.autoreload and diskwrite: 
			status('removing clock file because autoreload=False and diskwrite=True',tag='warning')
			os.remove(timefile)
		if timefile_exists and self.autoreload:
			#---load the clockfile instead of parsing the XTC file
			dat = load(timefile,path=self.postdir)
			timeseries = dat['timeseries']
		else:
			uni = gmxread(*[os.path.abspath(i) for i in [grofile,trajfile]])
			timeseries = [uni.trajectory[fr].time for fr in range(len(uni.trajectory))]
			if diskwrite: 
				store({'timeseries':timeseries},timefile,self.postdir,
					attrs=None,print_types=False,verbose=True)
		return timeseries
Example #7
0
def import_nanogel_positions(**kwargs):
    """
	Import nanogel data and send it to a calculation that mimics `protein_abstractor` for Samaneh's data.
	"""
    sn = kwargs.pop('sn', None)
    calc = kwargs.pop('calc', None)
    work = kwargs.pop('work', None)
    if kwargs: raise Exception('unprocessed kwargs %s' % kwargs)
    #---location data can be found in the slices dictionary
    #---! note that the slice name is hard-coded here: "current"
    location = work.slices[sn]['readymade_meso_v1']['current']
    with open(
            os.path.join(location['path'], location['directory'],
                         location['nanogel_dat'])) as fp:
        text = fp.read()
    #---nanogel is saved with the step number not the frame number
    step_to_frame = lambda x: x / 1000000
    regex_frame = '(\d+)\n(.*?)(?=\n\d+\n|\Z)'
    frames = re.findall(regex_frame, text, flags=re.M + re.DOTALL)
    framenos, points = [], []
    for fnum, frame in enumerate(frames):
        status('reading nanogel frame',
               i=fnum,
               looplen=len(frames),
               tag='load')
        framenos.append(step_to_frame(int(frame[0])))
        ixyz = np.array([[float(j) for j in i.split()]
                         for i in frame[1].splitlines()])
        if not np.all(ixyz[:, 0].astype(int) == np.arange(1, len(ixyz) + 1)):
            raise Exception('indexing problem in the nanogel')
        points.append(ixyz[:, 1:])
    return {'framenos': framenos, 'points': np.array(points)}
Example #8
0
def compute_rmsd(coords,align_selection=None,rmsd_selection=None):

	"""
	Return RMSDs and rotations for a single trajectory.
	"""

	if type(align_selection)==type(None): alignsel = slice(None,None)
	else: alignsel = array(align_selection)
	if type(rmsd_selection)==type(None): rmsdsel = slice(None,None)
	else: rmsdsel = array(rmsd_selection)
	#---simple RMSD code
	rmsds = []
	r0 = coords[0]
	r0 -= mean(r0,axis=0)
	nframes = len(coords)
	for fr in range(0,nframes):
		status('RMSD',i=fr,looplen=nframes)
		r1 = coords[fr]
		r1 -= mean(r1,axis=0)
		#---computation of RMSD validated against VMD but no reflection
		U,s,Vt = linalg.svd(dot(r0[alignsel].T,r1[alignsel]))
		signer = identity(3)
		signer[2,2] = sign(linalg.det(dot(Vt.T,U)))
		RM = dot(dot(U,signer),Vt)
		rmsds.append(sqrt(mean(sum((r0[rmsdsel].T-dot(RM,r1[rmsdsel].T))**2,axis=0))))
	return rmsds
Example #9
0
 def __init__(self, atoms_separator, vecs, **kwargs):
     #---we require the separator and vectors
     self.atoms_separator = atoms_separator
     #---nframes is the number of separate frames to use to attempt the separator
     self.vecs, self.nframes = vecs, len(vecs)
     #---new flags
     self.cluster = kwargs.pop('cluster', False)
     self.scan_mode = kwargs.pop('scan_mode', False)
     #---legacy flags
     self.monolayer_cutoff = kwargs.pop('monolayer_cutoff', None)
     self.monolayer_cutoff_retry = kwargs.pop('monolayer_cutoff_retry',
                                              True)
     self.topologize_tolerance = kwargs.pop('topologize_tolerance', None)
     self.cutoff_shrink_increment = kwargs.pop('cutoff_shrink_increment',
                                               None)
     self.cutoff_min = kwargs.pop('cutoff_min', None)
     self.random_tries = kwargs.pop('random_tries', None)
     self.cluster_neighbors = kwargs.pop('cluster_neighbors', None)
     if self.cluster_neighbors == None: self.cluster_neighbors = 4
     if kwargs: raise Exception('unprocessed kwargs: %s' % kwargs)
     #---check for scikit-learn
     if self.cluster:
         try:
             import sklearn
         except:
             status(
                 'cannot import scikit-learn so we will use legacy leaflet finder',
                 tag='warning')
             self.cluster = False
     #---the persistent function tries to distinguish leaflets according to the mode
     self.persistent()
Example #10
0
def identify_lipid_leaflets(pts,
                            vec,
                            monolayer_cutoff=2.0,
                            monolayer_cutoff_retry=True,
                            max_count_asymmetry=0.05,
                            pbc_rewrap=True,
                            topologize_tolerance=None):
    """
	Identify leaflets in a bilayer by consensus.
	Note that the time limit on the topologize call was increased from 10 to 30 for large systems.
	"""
    #---time limit on the tolerance checker
    try:
        with time_limit(30):
            wrapper = topologize(
                pts, vec,
                **({
                    'tol': topologize_tolerance
                } if topologize_tolerance else {}))
    except TimeoutException, msg:
        status(
            'topologize failed to join the bilayer. '
            'if it is broken over PBCs e.g. a saddle, this is a serious error which may go undetected. '
            'make sure you always inspect the topology later.',
            tag='error')
        wrapper = np.zeros((len(pts), 3))
def curvature_coupling_loader_membrane(data,**kwargs): 
	"""
	Receive the undulation data and prepare the meshes for the curvature coupling calculation.
	"""
	#---point heights into "memory"
	status('populating memory',tag='load')
	midplane_method = kwargs.pop('midplane_method','flat')
	if kwargs: raise Exception('unprocessed kwargs: %s'%kwargs)
	memory = {}
	for sn in data['undulations'].keys():
		if (sn,'hqs') not in memory:
			dat = data['undulations'][sn]['data']
			vecs = dat['vecs']
			mesh = dat['mesh']
			midplane = mesh.mean(axis=0)
			#---assume the average structure is a flat bilayer at the vertical center of the bilayer
			if midplane_method=='flat':
				zmeans = midplane.reshape((midplane.shape[0],-1)).mean(axis=1)
				midplane = np.array([i-zmeans[ii] for ii,i in enumerate(midplane)])
			#---assume the average structure is the average height profile of the bilayer
			elif midplane_method=='average':
				zmean = midplane.mean(axis=0)
				midplane -= zmean
			else: raise Exception('invalid midplane method %s'%midplane_method)
			hqs = fft_field(midplane)
			memory[(sn,'hqs')] = hqs
			memory[(sn,'vecs')] = vecs
	return memory
Example #12
0
def measure_normal_deviation_from_wavy_surface(heights,vecs,curvilinear=False):
	"""
	Given heights on a regular grid, compute the average surface and then compute the 
	"""
	global surf,surfs,mesh
	do_inflate = False
	inflate_factor = 10
	surfs = heights
	#---average surface
	surf_average_base = surfs.mean(axis=0)
	if do_inflate: surf_average = inflate_lateral(surf_average_base,inflate_factor)
	else: surf_average = surf_average_base
	#---height of the average surface
	pivot = surf_average.mean()
	#---standardized box vectors for all calculations (see notes above)
	mvec_base = vecs.mean(axis=0)
	#---get height fluctuations to set the half box height
	maxflux = surfs.ptp()*1.1/2.
	#---new standard box vectors have the correct height and inflated XY dimensions
	inflate_factors = np.array(surf_average.shape).astype(float)/np.array(surf_average_base.shape)
	#---use globals for parallel
	if do_inflate: mvec = np.array([mvec_base[0]*inflate_factors[0],mvec_base[1]*inflate_factors[1],maxflux*2.])
	else: mvec = np.array([mvec_base[0],mvec_base[1],maxflux*2.])
	#---compute a reference surface in absolute points
	#---we use vertical center so that all heights are shifted center of the new box given by twice maxflux
	surf = boxstuff(height_recenter(literalize(surf_average,mvec),pivot=pivot,maxflux=maxflux),mvec)
	#---make the reference mesh (slow step)
	status('making mesh (curvilinear=%s)'%curvilinear,tag='compute')
	mesh = makemesh(surf,mvec,curvilinear=curvilinear)
	status('mesh is ready',tag='compute')
	looper = [dict(fr=fr,pivot=pivot,mvec=mvec,maxflux=maxflux) for fr in range(len(surfs))]
	incoming = basic_compute_loop(average_normal_projections,looper=looper,run_parallel=True)
	#---we must reshape and concatenate the points
	return np.reshape(incoming,(-1,)+surf_average.shape)
def framelooper(total,start=None,text='frame'):
	"""
	When performing parallel calculations with joblib we pass a generator to count the number of 
	tasks and report the time.
	"""
	for fr in range(total):
		status(text,i=fr,looplen=total,tag='parallel',start=start)
		yield fr
			def callback(args):
				"""Watch the optimization."""
				global Nfeval
				name_groups = ['kappa','gamma','vibe']+['curve(%d)'%i for i in range(ndrops)]
				text = ' step = %d '%Nfeval+' '.join([name+' = '+dotplace(val)
					for name,val in zip(name_groups,args)+[('error',objective(args))]])
				status('searching! '+text,tag='optimize')
				Nfeval += 1
def salt_bridge_filter():
    global data_contacts, bonds, obs, valid_salt_bridges
    for sn in sns:
        #---filter the bonds and observations from contact maps
        bonds_all = data_contacts[sn]['data']['bonds']
        obs_all = data_contacts[sn]['data']['observations']
        nframes = len(obs_all)
        salt_bridge_inds = []
        #---loop over frames in the simulation
        for fr in range(nframes):
            status('filtering salt bridges from contact data',
                   i=fr,
                   looplen=nframes,
                   tag='compute')
            #---find observed bonds for that frame
            bonds_inds = np.where(obs_all[fr] == 1.0)[0]
            frame = bonds_all[bonds_inds]
            hits_over_salt_bridges = []
            for definition in valid_salt_bridges:
                matches_resname = frame[:, 0] == definition['resname']
                matches_atom = np.in1d(frame[:, 2], definition['atoms'])
                matches_lipid_oxygen = np.array([i[0]
                                                 for i in frame[:, 5]]) == 'O'
                matches = np.all(
                    (matches_resname, matches_atom, matches_lipid_oxygen),
                    axis=0)
                hits_over_salt_bridges.append(matches)
            frame_matches = np.where(np.any(hits_over_salt_bridges, axis=0))
            #---save the observed salt bridges by index number for the master bond list
            salt_bridge_inds.append(bonds_inds[frame_matches])
        #---get unique indices for the observed salt bridges
        salt_inds = np.unique(np.concatenate(salt_bridge_inds))
        #---set global bonds and obs so they only contain salt bridges and then run the bond_counter
        bonds = bonds_all[salt_inds]
        obs = obs_all[:, salt_inds]
        status('salt nbonds for %s is %d' % (sn, len(salt_inds)))
        #---! get resids for the protein and lipid_resnames from contact maps
        lipid_resnames = np.unique(data_contacts[sn]['data']['bonds']
                                   [:, rowspec.index('target_resname')])
        resids = data_contacts[sn]['data']['subject_residues_resids']
        resname_combos = [(r, np.array([r])) for r in lipid_resnames
                          ] + [('all lipids', np.array(lipid_resnames))]
        #---compute loop
        looper = [{
            'resid': resid,
            'resname_set': resname_set
        } for resid in resids for resname_name, resname_set in resname_combos]
        compute_function = bond_counter
        incoming = basic_compute_loop(compute_function,
                                      looper,
                                      run_parallel=True)
        #---tacking on compacted data to mimic the form of the contact maps
        data_contacts[sn]['data']['salt_compacted'] = np.array(incoming)
        if False:
            data_contacts[sn]['data']['pairs_resid_resname'] = np.array([
                (resid, resname_name) for resid in resids
                for resname_name, resname_set in resname_combos
            ]).astype(str)
	def callback(args):
		"""
		Watch the optimization.
		"""
		global Nfeval,name_groups,objective
		text = ' step = %d '%Nfeval+' '.join([name+' = '+dotplace(val)
			for name,val in zip(name_groups,args)+[('error',objective(args))]])
		status('searching! '+text,tag='optimize')
		Nfeval += 1
Example #17
0
 def wrapper(*args, **kwargs):
     #---you cannot call status here. have the function announce itself
     #---...actually this comes through in the jupyter notebook. removed for clarity
     status('running autoload args=%s, kwargs=%s' % (args, kwargs),
            tag='load')
     # we are using the Observer to get persistent locals from the function
     # ... note that we are calling Observer manually here because it is a decorator
     obs = Observer(function)
     obs.__call__(*args, **kwargs)
     # save locals for later loading into globals in replot
     plotrun.residue = obs._locals
Example #18
0
    def persistent(self):
        """
		Try to find the leaflets by using multiple frames and multiple cutoffs.
		"""
        if self.monolayer_cutoff == None: self.monolayer_cutoff = 2.0
        #---determine the mode and retry settings
        if self.cutoff_shrink_increment == None:
            self.cutoff_shrink_increment = 0.01
        #---previously we reduced the cutoff to zero before trying a different frame
        if self.cutoff_min == None: self.cutoff_min = 0.8
        #---legacy mode
        if not self.cluster:
            #---try multiple times
            if self.monolayer_cutoff_retry:
                #---legacy retry mode starts high and reduces the cutoff at each step
                #---! we could implement a method that tries cutoffs above/below the start point
                cutoffs = np.arange(
                    self.cutoff_min,
                    self.monolayer_cutoff + self.cutoff_shrink_increment,
                    self.cutoff_shrink_increment)[::-1]
            #---only try one cutoff
            else:
                cutoffs = [self.monolayer_cutoff]
        #---cluster mode uses a default cutoff
        else:
            cutoffs = [None]
        monolayer_indices = None
        #---recall that the caller provides frames for testing
        for fr in range(self.nframes):
            #---loop over cutoffs if we have multiple cutoffs
            for cutoff in cutoffs:
                if not self.cluster:
                    try:
                        if not self.cluster:
                            #---call the legacy leaflet finder (outside of this class)
                            monolayer_indices = identify_lipid_leaflets_legacy(
                                self.atoms_separator[fr],
                                self.vecs[fr],
                                monolayer_cutoff=cutoff)
                    except:
                        status(
                            'failed to distinguish leaflets with cluster=%s and cutoff=%s'
                            % (self.cluster, cutoff),
                            tag='error')
                else:
                    monolayer_indices = self.identify_leaflets_cluster(
                        pts=self.atoms_separator[fr], vec=self.vecs[fr])
                #---break when successful
                if type(monolayer_indices) != bool:
                    self.monolayer_indices = monolayer_indices
                    return
Example #19
0
    def autoplot_decorator(function):
        #---the autoplot decorator nested here so we get the supervisor as a parameter
        #---add the function to the supervisor
        name = function.__name__
        # only announce the wrap when looking otherwise confusing
        if plotrun.script_name != '__main__':
            status('wrapping the plot function named `%s`' % name)
        plotrun.register(name, function)

        def wrapper(*args, **kwargs):
            status('executing plot function `%s`' % name)
            return function(*args, **kwargs)

        return wrapper
def protein_abstractor(grofile, trajfile, **kwargs):
    """
	PROTEIN ABSTRACTOR
	Compute the centroids of proteins in a simulation.
	"""

    #---unpack
    sn = kwargs['sn']
    work = kwargs['workspace']
    parallel = kwargs.get('parallel', False)
    #---MDAnalysis uses Angstroms not nm
    lenscale = 10.

    #---get protein coms here
    uni = MDAnalysis.Universe(grofile, trajfile)
    #---! cgmd removed here sel = uni.select_atoms(work.vars['selectors']['protein_selection'])
    sel = uni.select_atoms('protein')
    nprots = work.meta.get(sn, {}).get('nprots', 1)
    beads_per_protein = len(sel.resids) / nprots
    nframes = len(uni.trajectory)
    inds = [
        arange(i * beads_per_protein, (i + 1) * beads_per_protein)
        for i in range(nprots)
    ]
    trajectory, trajectory_all, vecs = [], [], []
    start = time.time()
    for fr in range(nframes):
        status('collecting protein centroids',
               i=fr,
               looplen=nframes,
               start=start,
               tag='compute')
        uni.trajectory[fr]
        #---center of geometry not centroid because masses are all 72 in martini
        pts = sel.positions[array(inds).astype(int)] / lenscale
        pts_mean = pts.mean(axis=0)
        trajectory.append(pts_mean)
        trajectory_all.append(pts)
        vecs.append(sel.dimensions[:3])

    #---pack
    attrs, result = {}, {}
    result['resnames'] = array(sel.residues.resnames)
    result['names'] = array(sel.atoms.names)
    result['vecs'] = array(vecs) / lenscale
    result['nframes'] = array(nframes)
    result['points'] = array(trajectory)
    result['points_all'] = array(trajectory_all)
    return result, attrs
Example #21
0
def protein_rmsd(grofile, trajfile, **kwargs):
    """
	Compute the RMSD of a protein.
	"""

    #---unpack
    sn = kwargs['sn']
    work = kwargs['workspace']

    #---prepare universe
    slice_name = kwargs['slice_name']
    group = kwargs['group']
    uni = MDAnalysis.Universe(grofile, trajfile)
    nframes = len(uni.trajectory)
    protein = uni.select_atoms('protein and name CA')

    #---reference frame
    uni.trajectory[0]
    r0 = protein.positions
    r0 -= mean(r0, axis=0)

    #---collect coordinates
    nframes = len(uni.trajectory)
    coords, times = [], []
    for fr in range(0, nframes):
        uni.trajectory[fr]
        r1 = protein.positions
        coords.append(r1)
        times.append(uni.trajectory.time)

    #---simple RMSD code
    rmsds = []
    for fr in range(nframes):
        status('RMSD', i=fr, looplen=nframes)
        r1 = coords[fr]
        r1 -= mean(r1, axis=0)
        #---computation of RMSD validated against VMD but no reflection
        U, s, Vt = linalg.svd(dot(r0.T, r1))
        signer = identity(3)
        signer[2, 2] = sign(linalg.det(dot(Vt.T, U)))
        RM = dot(dot(U, signer), Vt)
        rmsds.append(sqrt(mean(sum((r0.T - dot(RM, r1.T))**2, axis=0))))

    #---pack
    attrs, result = {}, {}
    result['rmsds'] = array(rmsds)
    result['timeseries'] = array(times)
    return result, attrs
	def prepare_rootdir(self,dropname):
		"""This calculation is off-pathway so we make a folder in the post directory for it."""
		#---root directory
		self.rootdir = os.path.join(work.paths['post_data_spot'],dropname,'')
		if not os.path.isdir(self.rootdir): 
			os.mkdir(self.rootdir)
			#---make subdirectories for hypotheses and curvature fields
			for name,sub_dn in [('rootdir_cc','hypotheses'),('rootdir_cf','curvature_fields')]:
				os.mkdir(os.path.join(work.paths['post_data_spot'],dropname,sub_dn))
		#### else: raise Exception('refusing to write into preexisting directory: %s'%self.rootdir)
		else: status('data already exists',tag='note')
		for name,sub_dn in [('rootdir_cc','hypotheses'),('rootdir_cf','curvature_fields')]:
			self.__dict__[name] = os.path.join(work.paths['post_data_spot'],dropname,sub_dn)
		#---name the data files
		self.namer_cf = lambda pk : os.path.join(self.rootdir_cf,'curvature_field.%d.dat'%pk)
		self.namer_cc = lambda pk : os.path.join(self.rootdir_cc,'hypothesis.%d.dat'%pk)
Example #23
0
	def __init__(self,**kwargs):
		self.root = 'calcs'
		self.ledger_fn = kwargs.pop('ledger','audit.yaml')
		self.debug = kwargs.pop('debug',False)
		if kwargs: raise Exception('unprocessed kwargs %s'%kwargs)
		status('welcome to the auditor')
		self.ledger = os.path.join(self.root,self.ledger_fn)
		if not os.path.isfile(self.ledger): raise Exception('cannot find %s'%self.ledger)
		else: 
			with open(self.ledger) as fp: self.raw = yaml.load(fp.read())
		# print everything
		asciitree(self.raw)
		self.interpret()
		if self.debug:
			import ipdb
			ipdb.set_trace()
	def manual_evaluate_hypotheses(self):
		"""
		"""
		#---manual execution requires export of the data tables to the tools
		#---! prefer this to be systematic, but exporting is already offbeat
		cctools.namer_cf = self.namer_cf
		cctools.namer_cc = self.namer_cc
		cctools.Field = self.Field
		cctools.Hypothesis = self.Hypothesis
		cctools.memory = self.memory
		cctools.rootdir_cf = self.rootdir_cf
		cctools.rootdir_cc = self.rootdir_cc
		#---solve the hypotheses
		#---for memory efficiency we queue up hypotheses according to which curvature field they require
		#---note that we had a simpler, memory-hogging loop in a previous iteration of this code
		fns = [(i.id,self.namer_cc(i.id)) for i in self.sessions['hypothesis'].query(self.Hypothesis).all()]
		pending = [(pk,fn) for pk,fn in fns if not os.path.isfile(fn)]
		if pending:
			self.hypotheses = [self.sessions['hypothesis'].query(
				self.Hypothesis).filter_by(id=pk).one().dict() 
				for pk in zip(*pending)[0]]
			fields_required = [self.sessions['field'].query(self.Field).filter_by(**f.dict()).one() 
				for f in [self.Field(**h) for h in self.hypotheses]]
			field_ids_by_hypothesis = np.array([f.id for f in fields_required])
			unique_field_ids = np.unique(field_ids_by_hypothesis)
			#---compute the curvatures in batches
			for uu,ufid in enumerate(unique_field_ids):
				status('computing all hypotheses for field %d/%d'%(uu,len(unique_field_ids)),tag='compute')
				hypo_subset = [self.hypotheses[j] for j in np.where(field_ids_by_hypothesis==ufid)[0]]
				key_cf = ('curvature',ufid)
				self.memory[key_cf] = load(os.path.basename(self.namer_cf(ufid)),
					cwd=os.path.dirname(self.namer_cf(ufid)))
				#---queue for each part of the computation
				queue_hypothesis = mp.Queue()
				#---solve
				manyjob(single=False,
					function=manyjob_worker,
					queue=queue_hypothesis,
					session_classes=self.session_makers,
					objects=hypo_subset,
					kwargs={'preloaded_curvature':True})
				#---clear that hypothesis from memory
				del self.memory[key_cf]
			status('done all batches',tag='compute')
Example #25
0
    def autoplot(self, out=None):
        """Execute the replot sequence."""
        #---plot everything unless routine
        targets = (self.plot_names if self.routine == None else self.routine)
        #---for supervised execution we get locals from the exec on the script and pass them
        #---...out to globals here because the function call at the end of this function may need to
        #---...see them. this is unorthodox however these functions only run once
        if self.mode == 'supervised' and any(targets) and out != None:
            globals().update(**out)
        for plot_name in targets:
            #! plotname is wrong here. sometimes it is "plot"
            status('executing plot function `%s`' % plot_name, tag='autoplot')
            if plot_name not in self.plot_functions:
                raise Exception(
                    'this script does not have a plot function named %s' %
                    plot_name)

            self.plot_functions.update(**self.residue)
            self.plot_functions[plot_name]()
Example #26
0
	def identify_leaflets_cluster(self,pts,vec,topologize_time_limit=30,max_count_asymmetry=0.05):
		"""
		Use scikit-learn clustering methods to separate leaflets.
		Note that this method can cluster a tortuous manifold and may work for complex morphologies.	
		"""
		import scipy
		import sklearn
		import sklearn.neighbors
		import sklearn.cluster
		nlipids = len(pts)
		#---time limit on the topologize function which joins broken bilayers e.g. a saddle that crosses PBCs
		try:
			with time_limit(topologize_time_limit): 
				wrapper = topologize(pts,vec,
					**({'tol':self.topologize_tolerance} if self.topologize_tolerance else {}))
		except TimeoutException, msg: 
			status('topologize failed to join the bilayer. '
				'if it is broken over PBCs e.g. a saddle, this is a serious error which may go undetected. '
				'make sure you always inspect the topology later.',tag='error')
			wrapper = np.zeros((len(pts),3))
Example #27
0
def store(obj, name, path, attrs=None, print_types=False, verbose=True):
    """
	Use h5py to store a dictionary of data.
	"""
    import h5py
    #---! cannot do unicode in python 3. needs fixed
    if type(obj) != dict:
        raise Exception('except: only dictionaries can be stored')
    if os.path.isfile(path + '/' + name):
        raise Exception('except: file already exists: ' + path + '/' + name)
    path = os.path.abspath(os.path.expanduser(path))
    if not os.path.isdir(path): os.mkdir(path)
    fobj = h5py.File(path + '/' + name, 'w')
    for key in obj.keys():
        if print_types:
            print('[WRITING] ' + key + ' type=' + str(type(obj[key])))
            print('[WRITING] ' + key + ' dtype=' + str(obj[key].dtype))
        #---python3 cannot do unicode so we double check the type
        #---! the following might be wonky
        if (type(obj[key]) == np.ndarray
                and re.match('^str|^unicode', obj[key].dtype.name)
                and 'U' in obj[key].dtype.str):
            obj[key] = obj[key].astype('S')
        try:
            dset = fobj.create_dataset(key, data=obj[key])
        except:
            #---multidimensional scipy ndarray must be promoted to a proper numpy list
            try:
                dset = fobj.create_dataset(key, data=obj[key].tolist())
            except:
                raise Exception(
                    "failed to write this object so it's probably not numpy" +
                    "\n" + key + ' type=' + str(type(obj[key])) + ' dtype=' +
                    str(obj[key].dtype))
    if attrs != None:
        try:
            fobj.create_dataset('meta', data=np.string_(json.dumps(attrs)))
        except Exception as e:
            raise Exception('failed to serialize attributes: %s' % e)
    if verbose: status('[WRITING] ' + path + '/' + name)
    fobj.close()
Example #28
0
def picturefind(savename, directory='./', meta=None, loud=True):
    """
	Find a picture in the plot repository.
	"""
    if loud: status('searching pictures', tag='store')
    regex = '^.+\.v([0-9]+)\.png'
    fns = glob.glob(directory + '/' + savename + '.v*')
    nums = map(lambda y: (y, int(re.findall(regex, y)[0])),
               filter(lambda x: re.match(regex, x), fns))
    matches = [
        fn for fn, num in nums if compare_dicts(
            meta, picturedat(os.path.basename(fn), directory=directory))
    ]
    if len(matches) > 1 and meta != None:
        print('[ERROR] multiple matches found for %s' % savename)
        raise Exception('???')
    if matches == [] and meta == None:
        return dict([(os.path.basename(fn),
                      picturedat(os.path.basename(fn), directory=directory))
                     for fn, num in nums])
    return matches if not matches else matches[0]
	def manual_populate_fields(self):
		"""
		"""
		cctools.data = self.data
		#---compute pending fields according to populated rows
		fns = [(i.id,self.namer_cf(i.id)) for i in self.sessions['field'].query(self.Field).all()]
		pending = [(pk,fn) for pk,fn in fns if not os.path.isfile(fn)]
		if pending:
			#---loop over absent files
			start = time.time()
			for ii,(pk,fn) in enumerate(pending):
				status('computing curvature field',tag='compute',i=ii,looplen=len(pending),start=start)
				hypo = self.sessions['field'].query(self.Field).filter_by(id=pk).one().dict()
				sn = hypo['sn']
				dat = self.data['undulations'][sn]['data']
				vecs = dat['vecs']
				mn = np.shape(dat['mesh'])[2:]
				fields = construct_curvature_fields_trajectory(vecs=vecs,mn=mn,**hypo)
				store({'fields':np.array(fields['fields'])},os.path.basename(fn),self.rootdir_cf,
					attrs={key:val for key,val in fields.items()+hypo.items() 
					if key!='fields'},verbose=False)
Example #30
0
def contacts_membrane_loader(data):
    """
	Receive the undulation data and prepare the meshes for the curvature coupling calculation.
	"""
    #---point heights into "memory"
    status('populating memory', tag='load')
    memory = {}
    for sn in data['undulations'].keys():
        if (sn, 'hqs') not in memory:
            dat = data['undulations'][sn]['data']
            vecs = dat['vecs']
            mesh = dat['mesh']
            midplane = mesh.mean(axis=0)
            zmeans = midplane.reshape((midplane.shape[0], -1)).mean(axis=1)
            midplane = np.array(
                [i - zmeans[ii] for ii, i in enumerate(midplane)])
            hqs = fft_field(midplane)
            memory[(sn, 'hqs')] = hqs
            memory[(sn, 'vecs')] = vecs

    return memory
Example #31
0
def plot(plotname=None,nox=False,workspace=None,specfile=None,plotlog=False,**kwargs):

	"""
	Run a plotting routine.
	"""

	from copy import deepcopy
	if plotname == None:
		from base.workspace import Workspace
		if workspace == None: workspace = unpacker(conf_paths)['workspace_spot']
		work = Workspace(workspace,previous=False)
		specs = work.load_specs()
		plotnames = specs['plots'].keys()
	else: plotnames = [plotname]
	#---for each desired plot type
	for pname in plotnames:
		fns = []
		for (dirpath, dirnames, filenames) in os.walk('./'): 
			fns.extend([dirpath+'/'+fn for fn in filenames])
		search = filter(lambda x:re.match('^\.\/[^omni].+\/plot-%s\.py$'%pname,x),fns)
		if len(search)!=1: status('unclear search for %s: %s'%(pname,str(search)))
		else: 
			if plotname==None: 
				cmd = 'python '+search[0]+' nox quit=True '+' "%s"'%str(kwargs)+\
					(' &> %s'%plotlog if plotlog else '')
			else: 
				status('rerun the plot with:\n\nexecfile(\''+search[0]+'\')\n',tag='note')
				cmd = "python -i "+search[0]+(' nox' if nox else '')+' "%s"'%str(kwargs)
				#---! add log here? is the user or factory ever going to use this?
			status('calling: "%s"'%cmd,tag='status')
			os.system(cmd)
Example #32
0
	def verify(self,scrub=False):

		"""
		Check the post-processing filenames to make sure they are present.
		!!! Needs finished.
		"""

		status('passing through verify',tag='development')
		return

		#---! the following needs to be reincorprated into the workflow
		missing_files = []
		checks = []
		#---group files
		checks += [(('groups',sn,group),val[group]['fn']) 
			for sn,val in self.groups.items() for group in val]
		checks += [sl[name][key] for sn,sl in self.slices.items() 
			for name in sl for key in ['gro',self.trajectory_format] if key in sl[name]]
		for route,fn in checks:
			if not os.path.isfile(self.postdir+fn): missing_files.append([route,fn])
		if missing_files != [] and not scrub: 
			status('missing files: %s'%str(missing_files),tag='warning')
		elif missing_files != []:
			status('scrubbing deleted files from the workspace: %s'%str(missing_files),tag='warning')
			for route,fn in missing_files:
				del delve(self.__dict__,*route[:-1])[route[-1]]
		else: print '[STATUS] verified'
Example #33
0
	def __init__(self,spots):
		"""Parse simulation data on disk into a usable structure."""
		# default trajectory format is GROMACS XTC
		self.trajectory_format = 'xtc'
		# process the spots
		# for each "spot" in the yaml file, we construct a template for the data therein
		# the table of contents ("toc") holds one parsing for every part regex in every spot
		self.spots,self.toc = {},collections.OrderedDict()
		for name,details in spots.items():
			status('parsing data from spot "%s"'%name,tag='parse')
			rootdir = os.path.join(details['route_to_data'],details['spot_directory'])
			if not os.path.isdir(rootdir):
				raise Exception('\n[ERROR] cannot find root directory %s'%rootdir)
			for pnum,(part_name,part_regex) in enumerate(details['regexes']['part'].items()):
				status('parsing data type "%s"'%part_name,i=pnum,
					looplen=len(details['regexes']['part']),tag='parse')
				spot = (name,part_name)
				self.toc[spot] = {}
				self.spots[spot] = {
					'rootdir':os.path.join(rootdir,''),
					'top':details['regexes']['top'],
					'step':details['regexes']['step'],
					'part':part_regex,
					'namer':eval(details['namer']),
					'namer_text':details['namer'],}
				self.spots[spot]['divy_keys'] = self.divy_keys(spot)
		#---run the treeparser on each spot
		for snum,(spotname,spot) in enumerate(self.spots.items()):
			status('running the treeparser: %s,%s'%spotname,
				i=snum,looplen=len(self.spots),tag='parse',width=65)
			self.treeparser(spotname,**spot)
def ion_binding_combinator(**kwargs):

	"""
	Compute bridges.
	"""

	sn = kwargs['sn']
	dat = kwargs['upstream']['ion_binding']
	resnames = dat['resnames']
	pas = dat['partners_atoms']
	lipid_distances = dat['lipid_distances']
	nframes = dat['nframes']
	zonecut = kwargs['calc']['specs']['zonecut']
	results,attrs = {},{}
	attrs['zonecut'] = zonecut
	#---zonecut is angstroms while lipid_distances is nm
	zonecut = zonecut/10.
	i2s2 = lambda *items: '.'.join([str(i) for i in items])
	#---previous method may have created disorder downstream
	if 0: lipids = unique(resnames[unique([tuple(i) for fr in range(nframes) for i in pas[fr]])])
	lipids = array(list(resnames[sort(unique(resnames,return_index=True)[1])]))
	for nn in range(3):
		combos = array([''.join(j) for j in itertools.product(''.join([str(i) for i in range(nn+2)]),repeat=len(lipids)) if sum([int(k) for k in j])==nn+1])
		combonames = [tuple(v) for v in [concatenate([[lipids[ww]]*int(w) for ww,w in enumerate(l)]) for l in  combos]]
		#---! problematic method excised below
		#---! cind = lambda a : where(combos==''.join([str(sum(array(a)==i)) for i in lipids]))[0][0]

		wcs = zeros((nframes,len(combos)))
		st = time.time()
		status('[COMPUTE] combinator '+sn)
		#import pdb;pdb.set_trace()
		for fr in range(nframes):
			status('[COMPUTE] combinator nn='+str(nn+1),i=fr,looplen=nframes,start=st)
			parts = resnames[pas[fr,where(sum(lipid_distances[fr]<zonecut,axis=1)==nn+1)[0]]][:,:nn+1]
			#---! wcs[fr] = array([sum(array([cind(j) for j in parts])==i) for i in range(len(combos))])
			wcs[fr] = array([sum(array([where(combos==''.join([str(sum(array(j)==k)) for k in lipids]))[0][0] for j in parts])==i) for i in range(len(combos))])
		results[i2s2(nn,'wcs')] = wcs
		results[i2s2(nn,'combos')] = combos
		results[i2s2(nn,'combonames')] = array(combonames)
	return results,attrs
Example #35
0
	def save(self,quiet=False):

		"""
		Write the class to a pickle.
		Saving the workspace obviates the need to check timestamps and parse EDR files every time.
		Note: future development here will allow the workspace to be fully and quickly reconstituted from
		clock files saved to disk if the user sets the "timekeeper" option in paths.yaml.
		"""

		#---cannot save lambda functions in pickle
		detach = deepcopy(self.spots)
		for spot,details in self.spots.items(): 
			del details['namer']
			del details['divy_keys']
		if not quiet: status('saving',tag='work')
		#---ignore interrupts while writing the pickle
		wait = signal.signal(signal.SIGINT,signal.SIG_IGN)
		pickle.dump(self,open(self.filename,'wb'))
		signal.signal(signal.SIGINT,wait)
		if not quiet: status('done saving',tag='work')
		#---reattach the lambda functions after saving
		self.spots = detach
Example #36
0
	def treeparser(self,spot):

		"""
		This function parses simulation data which are organized into a "spot". 
		It writes the filenames to the table of contents (self.toc).
		"""

		spot_sub = self.spots[spot]
		rootdir = spot_sub['rootdir']
		#---start with all files under rootdir
		fns = [os.path.join(dirpath,fn) 
			for (dirpath, dirnames, filenames) 
			in os.walk(rootdir,followlinks=True) for fn in filenames]
		#---regex combinator is the only place where we enforce a naming convention via top,step,part
		#---note that we may wish to generalize this depending upon whether it is wise to have three parts
		regex = ('^%s\/'%re.escape(rootdir.rstrip('/'))+
			'\/'.join([spot_sub['top'],spot_sub['step'],spot_sub['part']])
			+'$')
		matches_raw = [i.groups() for fn in fns for i in [re.search(regex,fn)] if i]
		if not matches_raw: 
			status('no matches found for spot: "%s,%s"'%spot,tag='warning')
			return
		#---first we organize the top,step,part into tuples which serve as keys
		#---we organize the toc as a doubly-nested dictionary of trajectory parts
		#---the top two levels of the toc correspond to the top and step signifiers
		#---note that this procedure projects the top,step,part naming convention into the toc
		matches = [self.spots[spot]['divy_keys'](i) for i in matches_raw]
		self.toc[spot] = collections.OrderedDict()
		#---sort the tops into an ordered dictionary
		for top in sorted(set(zip(*matches)[0])): 
			self.toc[spot][top] = collections.OrderedDict()
		#---collect unique steps for each top and load them with the parts
		for top in self.toc[spot]:
			#---sort the steps into an ordered dictionary
			for step in sorted(set([i[1] for i in matches if i[0]==top])):
				#---we sort the parts into an ordered dictionary
				#---this is the leaf of the toc tree and we use dictionaries
				parts = sorted([i[2] for i in matches if i[0]==top and i[1]==step])
				self.toc[spot][top][step] = collections.OrderedDict([(part,{}) for part in parts])
Example #37
0
    def autoload_decorator(function):
        #---the autoload decorator nested here so we get the supervisor as a parameter
        #---add the function to the supervisor
        name = function.__name__
        # only announce the wrap when looking otherwise confusing
        if plotrun.script_name != '__main__':
            status('wrapping the loader function named `%s`' % name)
        #! plotrun.register_loader(name,function)
        def wrapper(*args, **kwargs):
            #---you cannot call status here. have the function announce itself
            #---...actually this comes through in the jupyter notebook. removed for clarity
            status('running autoload args=%s, kwargs=%s' % (args, kwargs),
                   tag='load')
            # we are using the Observer to get persistent locals from the function
            # ... note that we are calling Observer manually here because it is a decorator
            obs = Observer(function)
            obs.__call__(*args, **kwargs)
            # save locals for later loading into globals in replot
            plotrun.residue = obs._locals

        plotrun.register_loader(name, wrapper)
        return wrapper
	def drop_gaussians(self,**kwargs):
		"""
		Method for choosing the positions of Gaussians.
		"""
		pos_spec = kwargs.get('curvature_positions',{})
		method = pos_spec.get('method',None)
		extent = kwargs.get('extents',{}).get('extent',{})
		if not method: raise Exception('need a method for setting the curvature fields')
		elif method=='protein_subselection':
			self.data_prot,_ = plotload('protein_abstractor')
			for sn in work.sns():
				selections = pos_spec.get('selections',None)
				if not selections: raise Exception('need selections in protein_subselection')
				#---determine the centers of the protein according to the selections
				#---...noting that the protein_abstractor points are stored by the residue, not bead/atom 
				points = np.array([np.transpose(self.data_prot[sn]['data']['points'],(1,0,2))[s] 
					for s in selections])
				#points = np.transpose(self.data_prot[sn]['data']['points'],(1,0,2))[selections]
				points = points.mean(axis=1)[...,:2]
				ndrops = len(points)
				#---get data from the memory
				hqs = self.memory[(sn,'hqs')]
				self.nframes = len(hqs)
				mn = hqs.shape[1:]
				vecs = self.memory[(sn,'vecs')]
				vecs_mean = np.mean(vecs,axis=0)
				#---formulate the curvature request
				curvature_request = dict(curvature=1.0,mn=mn,sigma_a=extent,sigma_b=extent,theta=0.0)
				#---construct unity fields
				fields_unity = np.zeros((self.nframes,ndrops,mn[0],mn[1]))
				reindex,looper = zip(*[((fr,ndrop),
					dict(vecs=vecs[fr],centers=[points[ndrop][fr]/vecs[fr][:2]],**curvature_request)) 
					for fr in range(self.nframes) for ndrop in range(ndrops)])
				status('computing curvature fields for %s'%sn)
				incoming = basic_compute_loop(make_fields,looper=looper)
				#---! inelegant
				for ii,(fr,ndrop) in enumerate(reindex): fields_unity[fr][ndrop] = incoming[ii]
				self.memory[(sn,'fields_unity')] = fields_unity
Example #39
0
	def treeparser_edr(self):

		"""
		A special tree parser gets times from edr files.
		"""

		#---perform this operation on any spotnames with a part named "edr"
		spots_edr = [i for i in self.spots.keys() if i[1]=='edr']
		#---prepare a list of edr files to parse first
		targets = []
		for spot in spots_edr:
			for sn in self.toc[spot].keys():
				steps = self.toc[spot][sn].keys()
				for step in steps:
					parts = self.toc[spot][sn][step].keys()
					for part in parts:
						fn = self.keyfinder(spot)(sn,step,part)
						keys = (spot,sn,step,part)
						targets.append((fn,keys))
		for ii,(fn,keys) in enumerate(targets):
			status('scanning EDR files',i=ii,looplen=len(targets),tag='scan')
			times = edrcheck(fn)
			leaf = delve(self.toc,*keys)
			leaf['start'],leaf['stop'] = times
Example #40
0
	def create_slice(self,**kwargs):

		"""
		Create a slice of a trajectory.
		"""
	
		sn = kwargs['sn']
		start = kwargs['start']
		end = kwargs['end']
		skip = kwargs['skip']
		group = kwargs['group']
		slice_name = kwargs['slice_name']
		pbc = kwargs['pbc'] if 'pbc' in kwargs else None
		pbc_suffix = '' if not pbc else '.pbc%s'%pbc
		outkey = '%s.%d-%d-%d.%s%s'%(self.prefixer(sn),start,end,skip,group,pbc_suffix)
		grofile,trajfile = outkey+'.gro',outkey+'.'+self.trajectory_format
		#---make the slice only if necessary
		both_there = all([os.path.isfile(self.postdir+fn) for fn in [grofile,trajfile]])
		self.slice(sn,part_name=self.trajectory_format)
		if both_there and slice_name in self.slice(sn) and group in self.slice(sn)[slice_name]: return
		if not both_there or not all([self.confirm_file(self.postdir+fn) for fn in [grofile,trajfile]]):
			status('making slice: %s'%outkey,tag='status')
			#---slice is not there or not confirmed so we make a new one here
			sequence = self.get_timeseries(sn,strict=False)
			traj_toc = self.toc[self.cursor]
			#---assume the tpr part exists
			tpr_toc = self.toc[(self.c,'tpr')]
			try:
				#---! note that we force xtc below and this needs a solution ASAP!
				slice_trajectory(start,end,skip,sequence,outkey,self.postdir,
					tpr_keyfinder=self.keyfinder((self.c,'tpr')),
					traj_keyfinder=self.keyfinder((self.c,self.trajectory_format)),
                                        group_fn=self.groups[sn][group]['fn'],pbc=pbc)
			except KeyboardInterrupt: raise Exception('[ERROR] cancelled by user')
			except Exception as e:
				#---the following exception handler allows the code to continue to slice in case
				#---...of faulty data but it produces a large quantity of output including a full 
				#---...traceback to the original exception which also tells you which log files to read
				#---...to diagnose the error. tested on faulty data. note that the calculator continues
				#---...but every time you run "make compute" it will hit the error until you solve it
				exc_type, exc_obj, exc_tb = sys.exc_info()
				fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
				status('%s in %s at line %d'%(str(exc_type),fname,exc_tb.tb_lineno),tag='error')
				status('%s'%e,tag='error')
				import traceback
				status(re.sub('\n','\n[TRACEBACK] ',traceback.format_exc()),tag='traceback')
				status('failed to make slice: '+outkey,tag='error')
				if slice_name not in self.slice(sn): self.slice(sn)[slice_name] = {}
				self.slice(sn)[slice_name][group] = {'start':start,'end':end,'skip':skip,
					'group':group,'pbc':pbc,'verified':False,'filekey':outkey,
					'gro':grofile,self.trajectory_format:trajfile,'missing_frame_percent':100.}
				status('returning from this function but otherwise passing',tag='error')			
				return
		print '[STATUS] checking timestamps of slice: %s'%outkey
		#---slice is made or preexisting and now we validate
		timeseries = self.slice_timeseries(self.postdir+grofile,self.postdir+trajfile)
		import numpy as np
		missing_frame_percent = 1.-len(np.arange(start,end+skip,skip))/float(len(timeseries))
		if len(timeseries)!=len(np.arange(start,end+skip,skip)): verified = False
		else:
			try: verified = all(np.array(timeseries).astype(float)==
				np.arange(start,end+skip,skip).astype(float))
			except: verified = False
		if not verified: status('frame problems in %s'%outkey,tag='warning')
		if slice_name not in self.slice(sn): self.slice(sn)[slice_name] = {}
		self.slice(sn)[slice_name][group] = {'start':start,'end':end,'skip':skip,
			'group':group,'pbc':pbc,'verified':verified,'timeseries':timeseries,'filekey':outkey,
			'gro':grofile,self.trajectory_format:trajfile,'missing_frame_percent':missing_frame_percent}
Example #41
0
	def action(self,calculation_name=None):
	
		"""
		Parse a specifications file to make changes to a workspace.
		This function interprets the specifications and acts on it. 
		It manages the irreducible units of an omnicalc operation and ensures
		that the correct data are sent to analysis functions in the right order.
		"""

		status('parsing specs file',tag='status')

		#---load the yaml specifications file
		specs = self.load_specs()
		#### status('done loading specs',tag='status')		
		
		#---read simulations from the slices dictionary
		sns = specs['slices'].keys()
		#---variables are passed directly to self.vars
		self.vars = deepcopy(specs['variables']) if 'variables' in specs else {}

		#---apply "+"-delimited internal references in the yaml file
		for path,sub in [(i,j[-1]) for i,j in catalog(specs) if type(j)==list 
			and type(j)==str and re.match('^\+',j[-1])]:
			source = delve(self.vars,*sub.strip('+').split('/'))
			point = delve(specs,*path[:-1])
			point[path[-1]][point[path[-1]].index(sub)] = source
		for path,sub in [(i,j) for i,j in catalog(specs) if type(j)==str and re.match('^\+',j)]:
			source = delve(self.vars,*sub.strip('+').split('/'))
			point = delve(specs,*path[:-1])
			point[path[-1]] = source
		
		#---loop over all simulations to create groups and slices
		self.save(quiet=True)
		for route in [('slices',i) for i in sns]:
			root,sn = delve(specs,*route),route[-1]
			#---create groups
			if 'groups' in root:
				for group,select in root['groups'].items():
					kwargs = {'group':group,'select':select,'sn':sn}
					self.create_group(**kwargs)
				root.pop('groups')
			#---slice the trajectory
			if 'slices' in root:
				for sl,details in root['slices'].items(): 
					#---! use a default group here?
					for group in details['groups']:
						kwargs = {'sn':sn,'start':details['start'],
							'end':details['end'],'skip':details['skip'],'slice_name':sl}
						kwargs['group'] = group
						if 'pbc' in details: kwargs['pbc'] = details['pbc']
						self.create_slice(**kwargs)
				root.pop('slices')
			if root != {}: raise Exception('[ERROR] unprocessed specifications %s'%str(root))
			else: del root
		#---we only save after writing all slices. if the slicer fails autoreload will find preexisting files
		self.save(quiet=True)
		checktime()

		#---meta is passed to self.meta
		if 'meta' in specs:
			for sn in specs['meta']:
				self.meta[sn] = specs['meta'][sn]

		#---collections are groups of simulations
		if 'collections' in specs: self.vars['collections'] = specs['collections']

		#---calculations are executed last and organized in this loop
		if 'calculations' in specs:
			status('starting calculations',tag='status')
			#---note that most variables including calc mirror the specs file
			self.calc = dict(specs['calculations'])
			#---infer the correct order for the calculation keys from their upstream dependencies
			upstream_catalog = [i for i,j in catalog(self.calc) if 'upstream' in i]
			#---if there are no specs required to get the upstream data object the user can either 
			#---...use none/None as a placeholder or use the name as the key as in "upstream: name"
			for uu,uc in enumerate(upstream_catalog):
				if uc[-1]=='upstream': upstream_catalog[uu] = upstream_catalog[uu]+[delve(self.calc,*uc)]
			depends = {t[0]:[t[ii+1] for ii,i in enumerate(t) if ii<len(t)-1 and t[ii]=='upstream'] 
				for t in upstream_catalog}
			calckeys = [i for i in self.calc if i not in depends]
			#---check that the calckeys has enough elements 
			list(set(calckeys+[i for j in depends.values() for i in j]))			
			#---! come back to this!
			while any(depends):
				ii,i = depends.popitem()
				if all([j in calckeys for j in i]) and i!=[]: calckeys.append(ii)
				else: depends[ii] = i
			#---if a specific calculation name is given then only perform that calculation
			if not calculation_name is None: calckeys = [calculation_name]
			for calcname in calckeys:
				details = specs['calculations'][calcname]
				status('checking calculation %s'%calcname,tag='status')
				new_calcs = self.interpret_specs(details)
				#---perform calculations
				for calc in new_calcs:
					#---find the script with the funtion
					fns = []
					for (dirpath, dirnames, filenames) in os.walk('./'): 
						fns.extend([dirpath+'/'+fn for fn in filenames])
					search = filter(lambda x:re.match('^\.\/[^ate].+\/%s\.py$'%calcname,x),fns)
					if len(search)==0: raise Exception('\n[ERROR] cannot find %s.py'%calcname)
					elif len(search)>1: raise Exception('\n[ERROR] redundant matches: %s'%str(search))
					else:
						sys.path.insert(0,os.path.dirname(search[0]))
						function = unpacker(search[0],calcname)
						status('computing %s'%calcname,tag='loop')
						computer(function,calc=calc,workspace=self)
						self.save()
					checktime()
		self.save()