def create_group(self,**kwargs): """ Create a group. """ sn = kwargs['sn'] name = kwargs['group'] select = kwargs['select'] cols = 100 if 'cols' not in kwargs else kwargs['cols'] #---naming convention holds that the group names follow the prefix and we suffix with ndx simkey = self.prefixer(sn)+'.'+name fn = '%s.ndx'%simkey #---see if we need to make this group if os.path.isfile(self.postdir+fn) and name in self.groups[sn]: return elif os.path.isfile(self.postdir+fn): if self.confirm_file(self.postdir+fn): self.groups[sn][name] = {'fn':fn,'select':select} return status('creating group %s'%simkey,tag='status') #---read the structure uni = gmxread(self.get_last_start_structure(sn)) sel = mdasel(uni,select) #---write NDX import numpy as np iii = sel.indices+1 rows = [iii[np.arange(cols*i,cols*(i+1) if cols*(i+1)<len(iii) else len(iii))] for i in range(0,len(iii)/cols+1)] with open(self.postdir+fn,'w') as fp: fp.write('[ %s ]\n'%name) for line in rows: fp.write(' '.join(line.astype(str))+'\n') self.groups[sn][name] = {'fn':fn,'select':select}
def slice(self,sn,**kwargs): """ Interface to the slices dictionary. Handles all necessary inferences. Returns a subset of the self.slices dictionary indexed by group names. MORE DOCUMENTATION. """ #---default spotname self.cursor = kwargs.get('spot',self.cursor) part_name = kwargs.get('part_name',self.cursor[1]) #---search for the simulation in all spots keys_to_sn = [key for key in self.slices.keys() if key[1]==sn and key[0][1]==part_name] if len(keys_to_sn)>1: raise Exception('found simulation %s in multiple spots!'%sn) elif not keys_to_sn: raise Exception('failed to find slice key for sn "%s" and part "%s". '%(sn,part_name)+ 'this might happen if you are missing that simulation or the "spot" that holds it. '+ 'the cursor is "%s" and the spotname is "%s"'%(self.cursor,self.c)) unique_key = keys_to_sn[0] if unique_key[0] != self.cursor: self.cursor = unique_key[0] status('moving cursor to %s,%s'%self.cursor,tag='status') #---! needs concerted motion of cursor/c self.c = self.cursor[0] if unique_key not in self.slices: status('could not find slices for %s (is it in the specs file?)'%str(unique_key)) return self.slices[unique_key]
def lipid_areas2d(**kwargs): """ Compute bilayer midplane structures for studying undulations. """ #---parameters sn = kwargs['sn'] work = kwargs['workspace'] calc = kwargs['calc'] dat = kwargs['upstream']['lipid_mesh'] i2s = lambda mn, fr, key: '%d.%d.%s' % (mn, fr, key) nmols = [int(dat[i2s(mn, 0, 'nmol')]) for mn in range(2)] nframes = int(dat['nframes']) #---! could not run in parallel? start = time.time() areas = [[], []] for mn in range(2): for fr in range(nframes): status('voronoi areas monolayer %s' % mn, i=fr, looplen=nframes, start=start, tag='compute') areas[mn].append( area_voronoi2d(dat[i2s(mn, fr, 'points')], nmols[mn])) #---pack attrs, result = {}, {} result['areas0'] = np.array(areas[0]) result['areas1'] = np.array(areas[1]) result['nframes'] = np.array(nframes) result['vecs'] = dat['vecs'] result['monolayer_indices'] = dat['monolayer_indices'] result['resnames'] = dat['resnames'] return result, attrs
def compute_rmsd(coords, align_selection=None, rmsd_selection=None): """ Return RMSDs and rotations for a single trajectory. """ if type(align_selection) == type(None): alignsel = slice(None, None) else: alignsel = array(align_selection) if type(rmsd_selection) == type(None): rmsdsel = slice(None, None) else: rmsdsel = array(rmsd_selection) #---simple RMSD code rmsds = [] r0 = coords[0] r0 -= mean(r0, axis=0) nframes = len(coords) for fr in range(0, nframes): status('RMSD', i=fr, looplen=nframes) r1 = coords[fr] r1 -= mean(r1, axis=0) #---computation of RMSD validated against VMD but no reflection U, s, Vt = linalg.svd(dot(r0[alignsel].T, r1[alignsel])) signer = identity(3) signer[2, 2] = sign(linalg.det(dot(Vt.T, U))) RM = dot(dot(U, signer), Vt) rmsds.append( sqrt(mean(sum((r0[rmsdsel].T - dot(RM, r1[rmsdsel].T))**2, axis=0)))) return rmsds
def manual_prepare_compute(self): """ """ #---load the database start = time.time() session = self.sessions['hypothesis'] for hh,hypo in enumerate(self.hypotheses): status('populating hypos',tag='load',i=hh,looplen=len(self.hypotheses),start=start) #---reduce step before checking database hypo_full = self.Hypothesis(**hypo) matches = session.query(self.Hypothesis).filter_by(**hypo_full.base()).all() if not any(matches): session.add(hypo_full) session.commit() session = self.sessions['field'] for hh,hypo in enumerate(self.hypotheses): status('populating fields',tag='load',i=hh,looplen=len(self.hypotheses),start=start) #---reduce step before checking database hypo_full = self.Field(**hypo) matches = session.query(self.Field).filter_by(**hypo_full.base()).all() if not any(matches): session.add(hypo_full) session.commit() #---integrity checks on database rows hypotheses_reduced = [i.dict() for i in self.sessions['hypothesis'].query(self.Hypothesis).all()] fields_reduced = [i.dict() for i in self.sessions['field'].query(self.Field).all()] assert not [i for i in hypotheses_reduced if i['mapping']=='protein' and i['curvature']==0.0] assert not [i for i in hypotheses_reduced if i['curvature']==0.0 and not (i['sigma_a']==1.0 and i['isotropy']==1.0 and i['sigma_b']==1.0)]
def slice_timeseries(self,grofile,trajfile,**kwargs): """ Get the time series from a trajectory slice. The workspace holds very little data that cannot be parsed from specs files. However timeseries data for newly-created slices or perhaps even original sources can be large and somewhat costly to generate for an entire data set. For that reason we dump these to disk. For now we write the file based on the incoming trajfile name which should refer to new slices in the post directory. In the future we may extend this to sourced trajectories in a "spot". """ timefile = os.path.basename(re.sub('\.(xtc|trr)$','.clock',trajfile)) diskwrite = kwargs.get('diskwrite',self.write_timeseries_to_disk) timefile_exists = os.path.isfile(os.path.join(self.postdir,timefile)) if timefile_exists and not self.autoreload and diskwrite: status('removing clock file because autoreload=False and diskwrite=True',tag='warning') os.remove(timefile) if timefile_exists and self.autoreload: #---load the clockfile instead of parsing the XTC file dat = load(timefile,path=self.postdir) timeseries = dat['timeseries'] else: uni = gmxread(*[os.path.abspath(i) for i in [grofile,trajfile]]) timeseries = [uni.trajectory[fr].time for fr in range(len(uni.trajectory))] if diskwrite: store({'timeseries':timeseries},timefile,self.postdir, attrs=None,print_types=False,verbose=True) return timeseries
def import_nanogel_positions(**kwargs): """ Import nanogel data and send it to a calculation that mimics `protein_abstractor` for Samaneh's data. """ sn = kwargs.pop('sn', None) calc = kwargs.pop('calc', None) work = kwargs.pop('work', None) if kwargs: raise Exception('unprocessed kwargs %s' % kwargs) #---location data can be found in the slices dictionary #---! note that the slice name is hard-coded here: "current" location = work.slices[sn]['readymade_meso_v1']['current'] with open( os.path.join(location['path'], location['directory'], location['nanogel_dat'])) as fp: text = fp.read() #---nanogel is saved with the step number not the frame number step_to_frame = lambda x: x / 1000000 regex_frame = '(\d+)\n(.*?)(?=\n\d+\n|\Z)' frames = re.findall(regex_frame, text, flags=re.M + re.DOTALL) framenos, points = [], [] for fnum, frame in enumerate(frames): status('reading nanogel frame', i=fnum, looplen=len(frames), tag='load') framenos.append(step_to_frame(int(frame[0]))) ixyz = np.array([[float(j) for j in i.split()] for i in frame[1].splitlines()]) if not np.all(ixyz[:, 0].astype(int) == np.arange(1, len(ixyz) + 1)): raise Exception('indexing problem in the nanogel') points.append(ixyz[:, 1:]) return {'framenos': framenos, 'points': np.array(points)}
def compute_rmsd(coords,align_selection=None,rmsd_selection=None): """ Return RMSDs and rotations for a single trajectory. """ if type(align_selection)==type(None): alignsel = slice(None,None) else: alignsel = array(align_selection) if type(rmsd_selection)==type(None): rmsdsel = slice(None,None) else: rmsdsel = array(rmsd_selection) #---simple RMSD code rmsds = [] r0 = coords[0] r0 -= mean(r0,axis=0) nframes = len(coords) for fr in range(0,nframes): status('RMSD',i=fr,looplen=nframes) r1 = coords[fr] r1 -= mean(r1,axis=0) #---computation of RMSD validated against VMD but no reflection U,s,Vt = linalg.svd(dot(r0[alignsel].T,r1[alignsel])) signer = identity(3) signer[2,2] = sign(linalg.det(dot(Vt.T,U))) RM = dot(dot(U,signer),Vt) rmsds.append(sqrt(mean(sum((r0[rmsdsel].T-dot(RM,r1[rmsdsel].T))**2,axis=0)))) return rmsds
def __init__(self, atoms_separator, vecs, **kwargs): #---we require the separator and vectors self.atoms_separator = atoms_separator #---nframes is the number of separate frames to use to attempt the separator self.vecs, self.nframes = vecs, len(vecs) #---new flags self.cluster = kwargs.pop('cluster', False) self.scan_mode = kwargs.pop('scan_mode', False) #---legacy flags self.monolayer_cutoff = kwargs.pop('monolayer_cutoff', None) self.monolayer_cutoff_retry = kwargs.pop('monolayer_cutoff_retry', True) self.topologize_tolerance = kwargs.pop('topologize_tolerance', None) self.cutoff_shrink_increment = kwargs.pop('cutoff_shrink_increment', None) self.cutoff_min = kwargs.pop('cutoff_min', None) self.random_tries = kwargs.pop('random_tries', None) self.cluster_neighbors = kwargs.pop('cluster_neighbors', None) if self.cluster_neighbors == None: self.cluster_neighbors = 4 if kwargs: raise Exception('unprocessed kwargs: %s' % kwargs) #---check for scikit-learn if self.cluster: try: import sklearn except: status( 'cannot import scikit-learn so we will use legacy leaflet finder', tag='warning') self.cluster = False #---the persistent function tries to distinguish leaflets according to the mode self.persistent()
def identify_lipid_leaflets(pts, vec, monolayer_cutoff=2.0, monolayer_cutoff_retry=True, max_count_asymmetry=0.05, pbc_rewrap=True, topologize_tolerance=None): """ Identify leaflets in a bilayer by consensus. Note that the time limit on the topologize call was increased from 10 to 30 for large systems. """ #---time limit on the tolerance checker try: with time_limit(30): wrapper = topologize( pts, vec, **({ 'tol': topologize_tolerance } if topologize_tolerance else {})) except TimeoutException, msg: status( 'topologize failed to join the bilayer. ' 'if it is broken over PBCs e.g. a saddle, this is a serious error which may go undetected. ' 'make sure you always inspect the topology later.', tag='error') wrapper = np.zeros((len(pts), 3))
def curvature_coupling_loader_membrane(data,**kwargs): """ Receive the undulation data and prepare the meshes for the curvature coupling calculation. """ #---point heights into "memory" status('populating memory',tag='load') midplane_method = kwargs.pop('midplane_method','flat') if kwargs: raise Exception('unprocessed kwargs: %s'%kwargs) memory = {} for sn in data['undulations'].keys(): if (sn,'hqs') not in memory: dat = data['undulations'][sn]['data'] vecs = dat['vecs'] mesh = dat['mesh'] midplane = mesh.mean(axis=0) #---assume the average structure is a flat bilayer at the vertical center of the bilayer if midplane_method=='flat': zmeans = midplane.reshape((midplane.shape[0],-1)).mean(axis=1) midplane = np.array([i-zmeans[ii] for ii,i in enumerate(midplane)]) #---assume the average structure is the average height profile of the bilayer elif midplane_method=='average': zmean = midplane.mean(axis=0) midplane -= zmean else: raise Exception('invalid midplane method %s'%midplane_method) hqs = fft_field(midplane) memory[(sn,'hqs')] = hqs memory[(sn,'vecs')] = vecs return memory
def measure_normal_deviation_from_wavy_surface(heights,vecs,curvilinear=False): """ Given heights on a regular grid, compute the average surface and then compute the """ global surf,surfs,mesh do_inflate = False inflate_factor = 10 surfs = heights #---average surface surf_average_base = surfs.mean(axis=0) if do_inflate: surf_average = inflate_lateral(surf_average_base,inflate_factor) else: surf_average = surf_average_base #---height of the average surface pivot = surf_average.mean() #---standardized box vectors for all calculations (see notes above) mvec_base = vecs.mean(axis=0) #---get height fluctuations to set the half box height maxflux = surfs.ptp()*1.1/2. #---new standard box vectors have the correct height and inflated XY dimensions inflate_factors = np.array(surf_average.shape).astype(float)/np.array(surf_average_base.shape) #---use globals for parallel if do_inflate: mvec = np.array([mvec_base[0]*inflate_factors[0],mvec_base[1]*inflate_factors[1],maxflux*2.]) else: mvec = np.array([mvec_base[0],mvec_base[1],maxflux*2.]) #---compute a reference surface in absolute points #---we use vertical center so that all heights are shifted center of the new box given by twice maxflux surf = boxstuff(height_recenter(literalize(surf_average,mvec),pivot=pivot,maxflux=maxflux),mvec) #---make the reference mesh (slow step) status('making mesh (curvilinear=%s)'%curvilinear,tag='compute') mesh = makemesh(surf,mvec,curvilinear=curvilinear) status('mesh is ready',tag='compute') looper = [dict(fr=fr,pivot=pivot,mvec=mvec,maxflux=maxflux) for fr in range(len(surfs))] incoming = basic_compute_loop(average_normal_projections,looper=looper,run_parallel=True) #---we must reshape and concatenate the points return np.reshape(incoming,(-1,)+surf_average.shape)
def framelooper(total,start=None,text='frame'): """ When performing parallel calculations with joblib we pass a generator to count the number of tasks and report the time. """ for fr in range(total): status(text,i=fr,looplen=total,tag='parallel',start=start) yield fr
def callback(args): """Watch the optimization.""" global Nfeval name_groups = ['kappa','gamma','vibe']+['curve(%d)'%i for i in range(ndrops)] text = ' step = %d '%Nfeval+' '.join([name+' = '+dotplace(val) for name,val in zip(name_groups,args)+[('error',objective(args))]]) status('searching! '+text,tag='optimize') Nfeval += 1
def salt_bridge_filter(): global data_contacts, bonds, obs, valid_salt_bridges for sn in sns: #---filter the bonds and observations from contact maps bonds_all = data_contacts[sn]['data']['bonds'] obs_all = data_contacts[sn]['data']['observations'] nframes = len(obs_all) salt_bridge_inds = [] #---loop over frames in the simulation for fr in range(nframes): status('filtering salt bridges from contact data', i=fr, looplen=nframes, tag='compute') #---find observed bonds for that frame bonds_inds = np.where(obs_all[fr] == 1.0)[0] frame = bonds_all[bonds_inds] hits_over_salt_bridges = [] for definition in valid_salt_bridges: matches_resname = frame[:, 0] == definition['resname'] matches_atom = np.in1d(frame[:, 2], definition['atoms']) matches_lipid_oxygen = np.array([i[0] for i in frame[:, 5]]) == 'O' matches = np.all( (matches_resname, matches_atom, matches_lipid_oxygen), axis=0) hits_over_salt_bridges.append(matches) frame_matches = np.where(np.any(hits_over_salt_bridges, axis=0)) #---save the observed salt bridges by index number for the master bond list salt_bridge_inds.append(bonds_inds[frame_matches]) #---get unique indices for the observed salt bridges salt_inds = np.unique(np.concatenate(salt_bridge_inds)) #---set global bonds and obs so they only contain salt bridges and then run the bond_counter bonds = bonds_all[salt_inds] obs = obs_all[:, salt_inds] status('salt nbonds for %s is %d' % (sn, len(salt_inds))) #---! get resids for the protein and lipid_resnames from contact maps lipid_resnames = np.unique(data_contacts[sn]['data']['bonds'] [:, rowspec.index('target_resname')]) resids = data_contacts[sn]['data']['subject_residues_resids'] resname_combos = [(r, np.array([r])) for r in lipid_resnames ] + [('all lipids', np.array(lipid_resnames))] #---compute loop looper = [{ 'resid': resid, 'resname_set': resname_set } for resid in resids for resname_name, resname_set in resname_combos] compute_function = bond_counter incoming = basic_compute_loop(compute_function, looper, run_parallel=True) #---tacking on compacted data to mimic the form of the contact maps data_contacts[sn]['data']['salt_compacted'] = np.array(incoming) if False: data_contacts[sn]['data']['pairs_resid_resname'] = np.array([ (resid, resname_name) for resid in resids for resname_name, resname_set in resname_combos ]).astype(str)
def callback(args): """ Watch the optimization. """ global Nfeval,name_groups,objective text = ' step = %d '%Nfeval+' '.join([name+' = '+dotplace(val) for name,val in zip(name_groups,args)+[('error',objective(args))]]) status('searching! '+text,tag='optimize') Nfeval += 1
def wrapper(*args, **kwargs): #---you cannot call status here. have the function announce itself #---...actually this comes through in the jupyter notebook. removed for clarity status('running autoload args=%s, kwargs=%s' % (args, kwargs), tag='load') # we are using the Observer to get persistent locals from the function # ... note that we are calling Observer manually here because it is a decorator obs = Observer(function) obs.__call__(*args, **kwargs) # save locals for later loading into globals in replot plotrun.residue = obs._locals
def persistent(self): """ Try to find the leaflets by using multiple frames and multiple cutoffs. """ if self.monolayer_cutoff == None: self.monolayer_cutoff = 2.0 #---determine the mode and retry settings if self.cutoff_shrink_increment == None: self.cutoff_shrink_increment = 0.01 #---previously we reduced the cutoff to zero before trying a different frame if self.cutoff_min == None: self.cutoff_min = 0.8 #---legacy mode if not self.cluster: #---try multiple times if self.monolayer_cutoff_retry: #---legacy retry mode starts high and reduces the cutoff at each step #---! we could implement a method that tries cutoffs above/below the start point cutoffs = np.arange( self.cutoff_min, self.monolayer_cutoff + self.cutoff_shrink_increment, self.cutoff_shrink_increment)[::-1] #---only try one cutoff else: cutoffs = [self.monolayer_cutoff] #---cluster mode uses a default cutoff else: cutoffs = [None] monolayer_indices = None #---recall that the caller provides frames for testing for fr in range(self.nframes): #---loop over cutoffs if we have multiple cutoffs for cutoff in cutoffs: if not self.cluster: try: if not self.cluster: #---call the legacy leaflet finder (outside of this class) monolayer_indices = identify_lipid_leaflets_legacy( self.atoms_separator[fr], self.vecs[fr], monolayer_cutoff=cutoff) except: status( 'failed to distinguish leaflets with cluster=%s and cutoff=%s' % (self.cluster, cutoff), tag='error') else: monolayer_indices = self.identify_leaflets_cluster( pts=self.atoms_separator[fr], vec=self.vecs[fr]) #---break when successful if type(monolayer_indices) != bool: self.monolayer_indices = monolayer_indices return
def autoplot_decorator(function): #---the autoplot decorator nested here so we get the supervisor as a parameter #---add the function to the supervisor name = function.__name__ # only announce the wrap when looking otherwise confusing if plotrun.script_name != '__main__': status('wrapping the plot function named `%s`' % name) plotrun.register(name, function) def wrapper(*args, **kwargs): status('executing plot function `%s`' % name) return function(*args, **kwargs) return wrapper
def protein_abstractor(grofile, trajfile, **kwargs): """ PROTEIN ABSTRACTOR Compute the centroids of proteins in a simulation. """ #---unpack sn = kwargs['sn'] work = kwargs['workspace'] parallel = kwargs.get('parallel', False) #---MDAnalysis uses Angstroms not nm lenscale = 10. #---get protein coms here uni = MDAnalysis.Universe(grofile, trajfile) #---! cgmd removed here sel = uni.select_atoms(work.vars['selectors']['protein_selection']) sel = uni.select_atoms('protein') nprots = work.meta.get(sn, {}).get('nprots', 1) beads_per_protein = len(sel.resids) / nprots nframes = len(uni.trajectory) inds = [ arange(i * beads_per_protein, (i + 1) * beads_per_protein) for i in range(nprots) ] trajectory, trajectory_all, vecs = [], [], [] start = time.time() for fr in range(nframes): status('collecting protein centroids', i=fr, looplen=nframes, start=start, tag='compute') uni.trajectory[fr] #---center of geometry not centroid because masses are all 72 in martini pts = sel.positions[array(inds).astype(int)] / lenscale pts_mean = pts.mean(axis=0) trajectory.append(pts_mean) trajectory_all.append(pts) vecs.append(sel.dimensions[:3]) #---pack attrs, result = {}, {} result['resnames'] = array(sel.residues.resnames) result['names'] = array(sel.atoms.names) result['vecs'] = array(vecs) / lenscale result['nframes'] = array(nframes) result['points'] = array(trajectory) result['points_all'] = array(trajectory_all) return result, attrs
def protein_rmsd(grofile, trajfile, **kwargs): """ Compute the RMSD of a protein. """ #---unpack sn = kwargs['sn'] work = kwargs['workspace'] #---prepare universe slice_name = kwargs['slice_name'] group = kwargs['group'] uni = MDAnalysis.Universe(grofile, trajfile) nframes = len(uni.trajectory) protein = uni.select_atoms('protein and name CA') #---reference frame uni.trajectory[0] r0 = protein.positions r0 -= mean(r0, axis=0) #---collect coordinates nframes = len(uni.trajectory) coords, times = [], [] for fr in range(0, nframes): uni.trajectory[fr] r1 = protein.positions coords.append(r1) times.append(uni.trajectory.time) #---simple RMSD code rmsds = [] for fr in range(nframes): status('RMSD', i=fr, looplen=nframes) r1 = coords[fr] r1 -= mean(r1, axis=0) #---computation of RMSD validated against VMD but no reflection U, s, Vt = linalg.svd(dot(r0.T, r1)) signer = identity(3) signer[2, 2] = sign(linalg.det(dot(Vt.T, U))) RM = dot(dot(U, signer), Vt) rmsds.append(sqrt(mean(sum((r0.T - dot(RM, r1.T))**2, axis=0)))) #---pack attrs, result = {}, {} result['rmsds'] = array(rmsds) result['timeseries'] = array(times) return result, attrs
def prepare_rootdir(self,dropname): """This calculation is off-pathway so we make a folder in the post directory for it.""" #---root directory self.rootdir = os.path.join(work.paths['post_data_spot'],dropname,'') if not os.path.isdir(self.rootdir): os.mkdir(self.rootdir) #---make subdirectories for hypotheses and curvature fields for name,sub_dn in [('rootdir_cc','hypotheses'),('rootdir_cf','curvature_fields')]: os.mkdir(os.path.join(work.paths['post_data_spot'],dropname,sub_dn)) #### else: raise Exception('refusing to write into preexisting directory: %s'%self.rootdir) else: status('data already exists',tag='note') for name,sub_dn in [('rootdir_cc','hypotheses'),('rootdir_cf','curvature_fields')]: self.__dict__[name] = os.path.join(work.paths['post_data_spot'],dropname,sub_dn) #---name the data files self.namer_cf = lambda pk : os.path.join(self.rootdir_cf,'curvature_field.%d.dat'%pk) self.namer_cc = lambda pk : os.path.join(self.rootdir_cc,'hypothesis.%d.dat'%pk)
def __init__(self,**kwargs): self.root = 'calcs' self.ledger_fn = kwargs.pop('ledger','audit.yaml') self.debug = kwargs.pop('debug',False) if kwargs: raise Exception('unprocessed kwargs %s'%kwargs) status('welcome to the auditor') self.ledger = os.path.join(self.root,self.ledger_fn) if not os.path.isfile(self.ledger): raise Exception('cannot find %s'%self.ledger) else: with open(self.ledger) as fp: self.raw = yaml.load(fp.read()) # print everything asciitree(self.raw) self.interpret() if self.debug: import ipdb ipdb.set_trace()
def manual_evaluate_hypotheses(self): """ """ #---manual execution requires export of the data tables to the tools #---! prefer this to be systematic, but exporting is already offbeat cctools.namer_cf = self.namer_cf cctools.namer_cc = self.namer_cc cctools.Field = self.Field cctools.Hypothesis = self.Hypothesis cctools.memory = self.memory cctools.rootdir_cf = self.rootdir_cf cctools.rootdir_cc = self.rootdir_cc #---solve the hypotheses #---for memory efficiency we queue up hypotheses according to which curvature field they require #---note that we had a simpler, memory-hogging loop in a previous iteration of this code fns = [(i.id,self.namer_cc(i.id)) for i in self.sessions['hypothesis'].query(self.Hypothesis).all()] pending = [(pk,fn) for pk,fn in fns if not os.path.isfile(fn)] if pending: self.hypotheses = [self.sessions['hypothesis'].query( self.Hypothesis).filter_by(id=pk).one().dict() for pk in zip(*pending)[0]] fields_required = [self.sessions['field'].query(self.Field).filter_by(**f.dict()).one() for f in [self.Field(**h) for h in self.hypotheses]] field_ids_by_hypothesis = np.array([f.id for f in fields_required]) unique_field_ids = np.unique(field_ids_by_hypothesis) #---compute the curvatures in batches for uu,ufid in enumerate(unique_field_ids): status('computing all hypotheses for field %d/%d'%(uu,len(unique_field_ids)),tag='compute') hypo_subset = [self.hypotheses[j] for j in np.where(field_ids_by_hypothesis==ufid)[0]] key_cf = ('curvature',ufid) self.memory[key_cf] = load(os.path.basename(self.namer_cf(ufid)), cwd=os.path.dirname(self.namer_cf(ufid))) #---queue for each part of the computation queue_hypothesis = mp.Queue() #---solve manyjob(single=False, function=manyjob_worker, queue=queue_hypothesis, session_classes=self.session_makers, objects=hypo_subset, kwargs={'preloaded_curvature':True}) #---clear that hypothesis from memory del self.memory[key_cf] status('done all batches',tag='compute')
def autoplot(self, out=None): """Execute the replot sequence.""" #---plot everything unless routine targets = (self.plot_names if self.routine == None else self.routine) #---for supervised execution we get locals from the exec on the script and pass them #---...out to globals here because the function call at the end of this function may need to #---...see them. this is unorthodox however these functions only run once if self.mode == 'supervised' and any(targets) and out != None: globals().update(**out) for plot_name in targets: #! plotname is wrong here. sometimes it is "plot" status('executing plot function `%s`' % plot_name, tag='autoplot') if plot_name not in self.plot_functions: raise Exception( 'this script does not have a plot function named %s' % plot_name) self.plot_functions.update(**self.residue) self.plot_functions[plot_name]()
def identify_leaflets_cluster(self,pts,vec,topologize_time_limit=30,max_count_asymmetry=0.05): """ Use scikit-learn clustering methods to separate leaflets. Note that this method can cluster a tortuous manifold and may work for complex morphologies. """ import scipy import sklearn import sklearn.neighbors import sklearn.cluster nlipids = len(pts) #---time limit on the topologize function which joins broken bilayers e.g. a saddle that crosses PBCs try: with time_limit(topologize_time_limit): wrapper = topologize(pts,vec, **({'tol':self.topologize_tolerance} if self.topologize_tolerance else {})) except TimeoutException, msg: status('topologize failed to join the bilayer. ' 'if it is broken over PBCs e.g. a saddle, this is a serious error which may go undetected. ' 'make sure you always inspect the topology later.',tag='error') wrapper = np.zeros((len(pts),3))
def store(obj, name, path, attrs=None, print_types=False, verbose=True): """ Use h5py to store a dictionary of data. """ import h5py #---! cannot do unicode in python 3. needs fixed if type(obj) != dict: raise Exception('except: only dictionaries can be stored') if os.path.isfile(path + '/' + name): raise Exception('except: file already exists: ' + path + '/' + name) path = os.path.abspath(os.path.expanduser(path)) if not os.path.isdir(path): os.mkdir(path) fobj = h5py.File(path + '/' + name, 'w') for key in obj.keys(): if print_types: print('[WRITING] ' + key + ' type=' + str(type(obj[key]))) print('[WRITING] ' + key + ' dtype=' + str(obj[key].dtype)) #---python3 cannot do unicode so we double check the type #---! the following might be wonky if (type(obj[key]) == np.ndarray and re.match('^str|^unicode', obj[key].dtype.name) and 'U' in obj[key].dtype.str): obj[key] = obj[key].astype('S') try: dset = fobj.create_dataset(key, data=obj[key]) except: #---multidimensional scipy ndarray must be promoted to a proper numpy list try: dset = fobj.create_dataset(key, data=obj[key].tolist()) except: raise Exception( "failed to write this object so it's probably not numpy" + "\n" + key + ' type=' + str(type(obj[key])) + ' dtype=' + str(obj[key].dtype)) if attrs != None: try: fobj.create_dataset('meta', data=np.string_(json.dumps(attrs))) except Exception as e: raise Exception('failed to serialize attributes: %s' % e) if verbose: status('[WRITING] ' + path + '/' + name) fobj.close()
def picturefind(savename, directory='./', meta=None, loud=True): """ Find a picture in the plot repository. """ if loud: status('searching pictures', tag='store') regex = '^.+\.v([0-9]+)\.png' fns = glob.glob(directory + '/' + savename + '.v*') nums = map(lambda y: (y, int(re.findall(regex, y)[0])), filter(lambda x: re.match(regex, x), fns)) matches = [ fn for fn, num in nums if compare_dicts( meta, picturedat(os.path.basename(fn), directory=directory)) ] if len(matches) > 1 and meta != None: print('[ERROR] multiple matches found for %s' % savename) raise Exception('???') if matches == [] and meta == None: return dict([(os.path.basename(fn), picturedat(os.path.basename(fn), directory=directory)) for fn, num in nums]) return matches if not matches else matches[0]
def manual_populate_fields(self): """ """ cctools.data = self.data #---compute pending fields according to populated rows fns = [(i.id,self.namer_cf(i.id)) for i in self.sessions['field'].query(self.Field).all()] pending = [(pk,fn) for pk,fn in fns if not os.path.isfile(fn)] if pending: #---loop over absent files start = time.time() for ii,(pk,fn) in enumerate(pending): status('computing curvature field',tag='compute',i=ii,looplen=len(pending),start=start) hypo = self.sessions['field'].query(self.Field).filter_by(id=pk).one().dict() sn = hypo['sn'] dat = self.data['undulations'][sn]['data'] vecs = dat['vecs'] mn = np.shape(dat['mesh'])[2:] fields = construct_curvature_fields_trajectory(vecs=vecs,mn=mn,**hypo) store({'fields':np.array(fields['fields'])},os.path.basename(fn),self.rootdir_cf, attrs={key:val for key,val in fields.items()+hypo.items() if key!='fields'},verbose=False)
def contacts_membrane_loader(data): """ Receive the undulation data and prepare the meshes for the curvature coupling calculation. """ #---point heights into "memory" status('populating memory', tag='load') memory = {} for sn in data['undulations'].keys(): if (sn, 'hqs') not in memory: dat = data['undulations'][sn]['data'] vecs = dat['vecs'] mesh = dat['mesh'] midplane = mesh.mean(axis=0) zmeans = midplane.reshape((midplane.shape[0], -1)).mean(axis=1) midplane = np.array( [i - zmeans[ii] for ii, i in enumerate(midplane)]) hqs = fft_field(midplane) memory[(sn, 'hqs')] = hqs memory[(sn, 'vecs')] = vecs return memory
def plot(plotname=None,nox=False,workspace=None,specfile=None,plotlog=False,**kwargs): """ Run a plotting routine. """ from copy import deepcopy if plotname == None: from base.workspace import Workspace if workspace == None: workspace = unpacker(conf_paths)['workspace_spot'] work = Workspace(workspace,previous=False) specs = work.load_specs() plotnames = specs['plots'].keys() else: plotnames = [plotname] #---for each desired plot type for pname in plotnames: fns = [] for (dirpath, dirnames, filenames) in os.walk('./'): fns.extend([dirpath+'/'+fn for fn in filenames]) search = filter(lambda x:re.match('^\.\/[^omni].+\/plot-%s\.py$'%pname,x),fns) if len(search)!=1: status('unclear search for %s: %s'%(pname,str(search))) else: if plotname==None: cmd = 'python '+search[0]+' nox quit=True '+' "%s"'%str(kwargs)+\ (' &> %s'%plotlog if plotlog else '') else: status('rerun the plot with:\n\nexecfile(\''+search[0]+'\')\n',tag='note') cmd = "python -i "+search[0]+(' nox' if nox else '')+' "%s"'%str(kwargs) #---! add log here? is the user or factory ever going to use this? status('calling: "%s"'%cmd,tag='status') os.system(cmd)
def verify(self,scrub=False): """ Check the post-processing filenames to make sure they are present. !!! Needs finished. """ status('passing through verify',tag='development') return #---! the following needs to be reincorprated into the workflow missing_files = [] checks = [] #---group files checks += [(('groups',sn,group),val[group]['fn']) for sn,val in self.groups.items() for group in val] checks += [sl[name][key] for sn,sl in self.slices.items() for name in sl for key in ['gro',self.trajectory_format] if key in sl[name]] for route,fn in checks: if not os.path.isfile(self.postdir+fn): missing_files.append([route,fn]) if missing_files != [] and not scrub: status('missing files: %s'%str(missing_files),tag='warning') elif missing_files != []: status('scrubbing deleted files from the workspace: %s'%str(missing_files),tag='warning') for route,fn in missing_files: del delve(self.__dict__,*route[:-1])[route[-1]] else: print '[STATUS] verified'
def __init__(self,spots): """Parse simulation data on disk into a usable structure.""" # default trajectory format is GROMACS XTC self.trajectory_format = 'xtc' # process the spots # for each "spot" in the yaml file, we construct a template for the data therein # the table of contents ("toc") holds one parsing for every part regex in every spot self.spots,self.toc = {},collections.OrderedDict() for name,details in spots.items(): status('parsing data from spot "%s"'%name,tag='parse') rootdir = os.path.join(details['route_to_data'],details['spot_directory']) if not os.path.isdir(rootdir): raise Exception('\n[ERROR] cannot find root directory %s'%rootdir) for pnum,(part_name,part_regex) in enumerate(details['regexes']['part'].items()): status('parsing data type "%s"'%part_name,i=pnum, looplen=len(details['regexes']['part']),tag='parse') spot = (name,part_name) self.toc[spot] = {} self.spots[spot] = { 'rootdir':os.path.join(rootdir,''), 'top':details['regexes']['top'], 'step':details['regexes']['step'], 'part':part_regex, 'namer':eval(details['namer']), 'namer_text':details['namer'],} self.spots[spot]['divy_keys'] = self.divy_keys(spot) #---run the treeparser on each spot for snum,(spotname,spot) in enumerate(self.spots.items()): status('running the treeparser: %s,%s'%spotname, i=snum,looplen=len(self.spots),tag='parse',width=65) self.treeparser(spotname,**spot)
def ion_binding_combinator(**kwargs): """ Compute bridges. """ sn = kwargs['sn'] dat = kwargs['upstream']['ion_binding'] resnames = dat['resnames'] pas = dat['partners_atoms'] lipid_distances = dat['lipid_distances'] nframes = dat['nframes'] zonecut = kwargs['calc']['specs']['zonecut'] results,attrs = {},{} attrs['zonecut'] = zonecut #---zonecut is angstroms while lipid_distances is nm zonecut = zonecut/10. i2s2 = lambda *items: '.'.join([str(i) for i in items]) #---previous method may have created disorder downstream if 0: lipids = unique(resnames[unique([tuple(i) for fr in range(nframes) for i in pas[fr]])]) lipids = array(list(resnames[sort(unique(resnames,return_index=True)[1])])) for nn in range(3): combos = array([''.join(j) for j in itertools.product(''.join([str(i) for i in range(nn+2)]),repeat=len(lipids)) if sum([int(k) for k in j])==nn+1]) combonames = [tuple(v) for v in [concatenate([[lipids[ww]]*int(w) for ww,w in enumerate(l)]) for l in combos]] #---! problematic method excised below #---! cind = lambda a : where(combos==''.join([str(sum(array(a)==i)) for i in lipids]))[0][0] wcs = zeros((nframes,len(combos))) st = time.time() status('[COMPUTE] combinator '+sn) #import pdb;pdb.set_trace() for fr in range(nframes): status('[COMPUTE] combinator nn='+str(nn+1),i=fr,looplen=nframes,start=st) parts = resnames[pas[fr,where(sum(lipid_distances[fr]<zonecut,axis=1)==nn+1)[0]]][:,:nn+1] #---! wcs[fr] = array([sum(array([cind(j) for j in parts])==i) for i in range(len(combos))]) wcs[fr] = array([sum(array([where(combos==''.join([str(sum(array(j)==k)) for k in lipids]))[0][0] for j in parts])==i) for i in range(len(combos))]) results[i2s2(nn,'wcs')] = wcs results[i2s2(nn,'combos')] = combos results[i2s2(nn,'combonames')] = array(combonames) return results,attrs
def save(self,quiet=False): """ Write the class to a pickle. Saving the workspace obviates the need to check timestamps and parse EDR files every time. Note: future development here will allow the workspace to be fully and quickly reconstituted from clock files saved to disk if the user sets the "timekeeper" option in paths.yaml. """ #---cannot save lambda functions in pickle detach = deepcopy(self.spots) for spot,details in self.spots.items(): del details['namer'] del details['divy_keys'] if not quiet: status('saving',tag='work') #---ignore interrupts while writing the pickle wait = signal.signal(signal.SIGINT,signal.SIG_IGN) pickle.dump(self,open(self.filename,'wb')) signal.signal(signal.SIGINT,wait) if not quiet: status('done saving',tag='work') #---reattach the lambda functions after saving self.spots = detach
def treeparser(self,spot): """ This function parses simulation data which are organized into a "spot". It writes the filenames to the table of contents (self.toc). """ spot_sub = self.spots[spot] rootdir = spot_sub['rootdir'] #---start with all files under rootdir fns = [os.path.join(dirpath,fn) for (dirpath, dirnames, filenames) in os.walk(rootdir,followlinks=True) for fn in filenames] #---regex combinator is the only place where we enforce a naming convention via top,step,part #---note that we may wish to generalize this depending upon whether it is wise to have three parts regex = ('^%s\/'%re.escape(rootdir.rstrip('/'))+ '\/'.join([spot_sub['top'],spot_sub['step'],spot_sub['part']]) +'$') matches_raw = [i.groups() for fn in fns for i in [re.search(regex,fn)] if i] if not matches_raw: status('no matches found for spot: "%s,%s"'%spot,tag='warning') return #---first we organize the top,step,part into tuples which serve as keys #---we organize the toc as a doubly-nested dictionary of trajectory parts #---the top two levels of the toc correspond to the top and step signifiers #---note that this procedure projects the top,step,part naming convention into the toc matches = [self.spots[spot]['divy_keys'](i) for i in matches_raw] self.toc[spot] = collections.OrderedDict() #---sort the tops into an ordered dictionary for top in sorted(set(zip(*matches)[0])): self.toc[spot][top] = collections.OrderedDict() #---collect unique steps for each top and load them with the parts for top in self.toc[spot]: #---sort the steps into an ordered dictionary for step in sorted(set([i[1] for i in matches if i[0]==top])): #---we sort the parts into an ordered dictionary #---this is the leaf of the toc tree and we use dictionaries parts = sorted([i[2] for i in matches if i[0]==top and i[1]==step]) self.toc[spot][top][step] = collections.OrderedDict([(part,{}) for part in parts])
def autoload_decorator(function): #---the autoload decorator nested here so we get the supervisor as a parameter #---add the function to the supervisor name = function.__name__ # only announce the wrap when looking otherwise confusing if plotrun.script_name != '__main__': status('wrapping the loader function named `%s`' % name) #! plotrun.register_loader(name,function) def wrapper(*args, **kwargs): #---you cannot call status here. have the function announce itself #---...actually this comes through in the jupyter notebook. removed for clarity status('running autoload args=%s, kwargs=%s' % (args, kwargs), tag='load') # we are using the Observer to get persistent locals from the function # ... note that we are calling Observer manually here because it is a decorator obs = Observer(function) obs.__call__(*args, **kwargs) # save locals for later loading into globals in replot plotrun.residue = obs._locals plotrun.register_loader(name, wrapper) return wrapper
def drop_gaussians(self,**kwargs): """ Method for choosing the positions of Gaussians. """ pos_spec = kwargs.get('curvature_positions',{}) method = pos_spec.get('method',None) extent = kwargs.get('extents',{}).get('extent',{}) if not method: raise Exception('need a method for setting the curvature fields') elif method=='protein_subselection': self.data_prot,_ = plotload('protein_abstractor') for sn in work.sns(): selections = pos_spec.get('selections',None) if not selections: raise Exception('need selections in protein_subselection') #---determine the centers of the protein according to the selections #---...noting that the protein_abstractor points are stored by the residue, not bead/atom points = np.array([np.transpose(self.data_prot[sn]['data']['points'],(1,0,2))[s] for s in selections]) #points = np.transpose(self.data_prot[sn]['data']['points'],(1,0,2))[selections] points = points.mean(axis=1)[...,:2] ndrops = len(points) #---get data from the memory hqs = self.memory[(sn,'hqs')] self.nframes = len(hqs) mn = hqs.shape[1:] vecs = self.memory[(sn,'vecs')] vecs_mean = np.mean(vecs,axis=0) #---formulate the curvature request curvature_request = dict(curvature=1.0,mn=mn,sigma_a=extent,sigma_b=extent,theta=0.0) #---construct unity fields fields_unity = np.zeros((self.nframes,ndrops,mn[0],mn[1])) reindex,looper = zip(*[((fr,ndrop), dict(vecs=vecs[fr],centers=[points[ndrop][fr]/vecs[fr][:2]],**curvature_request)) for fr in range(self.nframes) for ndrop in range(ndrops)]) status('computing curvature fields for %s'%sn) incoming = basic_compute_loop(make_fields,looper=looper) #---! inelegant for ii,(fr,ndrop) in enumerate(reindex): fields_unity[fr][ndrop] = incoming[ii] self.memory[(sn,'fields_unity')] = fields_unity
def treeparser_edr(self): """ A special tree parser gets times from edr files. """ #---perform this operation on any spotnames with a part named "edr" spots_edr = [i for i in self.spots.keys() if i[1]=='edr'] #---prepare a list of edr files to parse first targets = [] for spot in spots_edr: for sn in self.toc[spot].keys(): steps = self.toc[spot][sn].keys() for step in steps: parts = self.toc[spot][sn][step].keys() for part in parts: fn = self.keyfinder(spot)(sn,step,part) keys = (spot,sn,step,part) targets.append((fn,keys)) for ii,(fn,keys) in enumerate(targets): status('scanning EDR files',i=ii,looplen=len(targets),tag='scan') times = edrcheck(fn) leaf = delve(self.toc,*keys) leaf['start'],leaf['stop'] = times
def create_slice(self,**kwargs): """ Create a slice of a trajectory. """ sn = kwargs['sn'] start = kwargs['start'] end = kwargs['end'] skip = kwargs['skip'] group = kwargs['group'] slice_name = kwargs['slice_name'] pbc = kwargs['pbc'] if 'pbc' in kwargs else None pbc_suffix = '' if not pbc else '.pbc%s'%pbc outkey = '%s.%d-%d-%d.%s%s'%(self.prefixer(sn),start,end,skip,group,pbc_suffix) grofile,trajfile = outkey+'.gro',outkey+'.'+self.trajectory_format #---make the slice only if necessary both_there = all([os.path.isfile(self.postdir+fn) for fn in [grofile,trajfile]]) self.slice(sn,part_name=self.trajectory_format) if both_there and slice_name in self.slice(sn) and group in self.slice(sn)[slice_name]: return if not both_there or not all([self.confirm_file(self.postdir+fn) for fn in [grofile,trajfile]]): status('making slice: %s'%outkey,tag='status') #---slice is not there or not confirmed so we make a new one here sequence = self.get_timeseries(sn,strict=False) traj_toc = self.toc[self.cursor] #---assume the tpr part exists tpr_toc = self.toc[(self.c,'tpr')] try: #---! note that we force xtc below and this needs a solution ASAP! slice_trajectory(start,end,skip,sequence,outkey,self.postdir, tpr_keyfinder=self.keyfinder((self.c,'tpr')), traj_keyfinder=self.keyfinder((self.c,self.trajectory_format)), group_fn=self.groups[sn][group]['fn'],pbc=pbc) except KeyboardInterrupt: raise Exception('[ERROR] cancelled by user') except Exception as e: #---the following exception handler allows the code to continue to slice in case #---...of faulty data but it produces a large quantity of output including a full #---...traceback to the original exception which also tells you which log files to read #---...to diagnose the error. tested on faulty data. note that the calculator continues #---...but every time you run "make compute" it will hit the error until you solve it exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] status('%s in %s at line %d'%(str(exc_type),fname,exc_tb.tb_lineno),tag='error') status('%s'%e,tag='error') import traceback status(re.sub('\n','\n[TRACEBACK] ',traceback.format_exc()),tag='traceback') status('failed to make slice: '+outkey,tag='error') if slice_name not in self.slice(sn): self.slice(sn)[slice_name] = {} self.slice(sn)[slice_name][group] = {'start':start,'end':end,'skip':skip, 'group':group,'pbc':pbc,'verified':False,'filekey':outkey, 'gro':grofile,self.trajectory_format:trajfile,'missing_frame_percent':100.} status('returning from this function but otherwise passing',tag='error') return print '[STATUS] checking timestamps of slice: %s'%outkey #---slice is made or preexisting and now we validate timeseries = self.slice_timeseries(self.postdir+grofile,self.postdir+trajfile) import numpy as np missing_frame_percent = 1.-len(np.arange(start,end+skip,skip))/float(len(timeseries)) if len(timeseries)!=len(np.arange(start,end+skip,skip)): verified = False else: try: verified = all(np.array(timeseries).astype(float)== np.arange(start,end+skip,skip).astype(float)) except: verified = False if not verified: status('frame problems in %s'%outkey,tag='warning') if slice_name not in self.slice(sn): self.slice(sn)[slice_name] = {} self.slice(sn)[slice_name][group] = {'start':start,'end':end,'skip':skip, 'group':group,'pbc':pbc,'verified':verified,'timeseries':timeseries,'filekey':outkey, 'gro':grofile,self.trajectory_format:trajfile,'missing_frame_percent':missing_frame_percent}
def action(self,calculation_name=None): """ Parse a specifications file to make changes to a workspace. This function interprets the specifications and acts on it. It manages the irreducible units of an omnicalc operation and ensures that the correct data are sent to analysis functions in the right order. """ status('parsing specs file',tag='status') #---load the yaml specifications file specs = self.load_specs() #### status('done loading specs',tag='status') #---read simulations from the slices dictionary sns = specs['slices'].keys() #---variables are passed directly to self.vars self.vars = deepcopy(specs['variables']) if 'variables' in specs else {} #---apply "+"-delimited internal references in the yaml file for path,sub in [(i,j[-1]) for i,j in catalog(specs) if type(j)==list and type(j)==str and re.match('^\+',j[-1])]: source = delve(self.vars,*sub.strip('+').split('/')) point = delve(specs,*path[:-1]) point[path[-1]][point[path[-1]].index(sub)] = source for path,sub in [(i,j) for i,j in catalog(specs) if type(j)==str and re.match('^\+',j)]: source = delve(self.vars,*sub.strip('+').split('/')) point = delve(specs,*path[:-1]) point[path[-1]] = source #---loop over all simulations to create groups and slices self.save(quiet=True) for route in [('slices',i) for i in sns]: root,sn = delve(specs,*route),route[-1] #---create groups if 'groups' in root: for group,select in root['groups'].items(): kwargs = {'group':group,'select':select,'sn':sn} self.create_group(**kwargs) root.pop('groups') #---slice the trajectory if 'slices' in root: for sl,details in root['slices'].items(): #---! use a default group here? for group in details['groups']: kwargs = {'sn':sn,'start':details['start'], 'end':details['end'],'skip':details['skip'],'slice_name':sl} kwargs['group'] = group if 'pbc' in details: kwargs['pbc'] = details['pbc'] self.create_slice(**kwargs) root.pop('slices') if root != {}: raise Exception('[ERROR] unprocessed specifications %s'%str(root)) else: del root #---we only save after writing all slices. if the slicer fails autoreload will find preexisting files self.save(quiet=True) checktime() #---meta is passed to self.meta if 'meta' in specs: for sn in specs['meta']: self.meta[sn] = specs['meta'][sn] #---collections are groups of simulations if 'collections' in specs: self.vars['collections'] = specs['collections'] #---calculations are executed last and organized in this loop if 'calculations' in specs: status('starting calculations',tag='status') #---note that most variables including calc mirror the specs file self.calc = dict(specs['calculations']) #---infer the correct order for the calculation keys from their upstream dependencies upstream_catalog = [i for i,j in catalog(self.calc) if 'upstream' in i] #---if there are no specs required to get the upstream data object the user can either #---...use none/None as a placeholder or use the name as the key as in "upstream: name" for uu,uc in enumerate(upstream_catalog): if uc[-1]=='upstream': upstream_catalog[uu] = upstream_catalog[uu]+[delve(self.calc,*uc)] depends = {t[0]:[t[ii+1] for ii,i in enumerate(t) if ii<len(t)-1 and t[ii]=='upstream'] for t in upstream_catalog} calckeys = [i for i in self.calc if i not in depends] #---check that the calckeys has enough elements list(set(calckeys+[i for j in depends.values() for i in j])) #---! come back to this! while any(depends): ii,i = depends.popitem() if all([j in calckeys for j in i]) and i!=[]: calckeys.append(ii) else: depends[ii] = i #---if a specific calculation name is given then only perform that calculation if not calculation_name is None: calckeys = [calculation_name] for calcname in calckeys: details = specs['calculations'][calcname] status('checking calculation %s'%calcname,tag='status') new_calcs = self.interpret_specs(details) #---perform calculations for calc in new_calcs: #---find the script with the funtion fns = [] for (dirpath, dirnames, filenames) in os.walk('./'): fns.extend([dirpath+'/'+fn for fn in filenames]) search = filter(lambda x:re.match('^\.\/[^ate].+\/%s\.py$'%calcname,x),fns) if len(search)==0: raise Exception('\n[ERROR] cannot find %s.py'%calcname) elif len(search)>1: raise Exception('\n[ERROR] redundant matches: %s'%str(search)) else: sys.path.insert(0,os.path.dirname(search[0])) function = unpacker(search[0],calcname) status('computing %s'%calcname,tag='loop') computer(function,calc=calc,workspace=self) self.save() checktime() self.save()