def import_readymade_meso_v2_membrane(**kwargs): """ Compute bilayer midplane structures for studying undulations. Adapted from `undulations.py`. """ import ipdb ipdb.set_trace() #---parameters sn = kwargs['sn'] work = kwargs['workspace'] calc = kwargs['calc'] #---import mesh points points = import_membrane_mesh(sn=sn, calc=calc, work=work) #---ensure there are the same number of points points_shapes = list(set([p.shape for p in points])) if len(points_shapes) != 1: raise Exception('some frames have a different number of points: %s' % points_shapes) else: npoints, ncols = points_shapes[0] if ncols != 4: raise Exception('expecting 4-column input on incoming data') #---with a consistent number of points everything is an array points = np.array(points)[:, :, :3] #---previously checked that the minimum points were identically zero but this was not always true #---box vectors are just the maximum points #---! check that this assumption makes sense vecs = points.max(axis=1)[:, :3] #---debug the shapes in 3D if False: from codes import review3d fr = 0 review3d.pbcbox(vecs[fr]) review3d.review3d(points=[points[fr][:, :3]], radius=10) grid_spacing = calc['specs']['grid_spacing'] nframes = len(points) #---choose grid dimensions grid = np.array([round(i) for i in np.mean(vecs, axis=0) / grid_spacing])[:2] #---compute in parallel start = time.time() mesh = [[]] mesh[0] = Parallel(n_jobs=work.nprocs, verbose=0)( delayed(makemesh_regular, has_shareable_memory)(points[fr], vecs[fr], grid) for fr in framelooper(nframes, start=start, text='frame')) checktime() #---pack attrs, result = {}, {} result['mesh'] = np.array(mesh) result['grid'] = np.array(grid) result['nframes'] = np.array(nframes) result['vecs'] = vecs attrs['grid_spacing'] = grid_spacing #---introduce a dated validator string to ensure that any changes to the pipeline do not overwrite #---...other data and are fully propagated downstream attrs['validator'] = '2017.08.16.1930' return result, attrs
def lipid_mesh(**kwargs): """ Compute monolayer mesh objects. """ #---parameters sn = kwargs['sn'] work = kwargs['workspace'] calc = kwargs['calc'] dat = kwargs['upstream']['lipid_abstractor'] resnames = dat['resnames'] monolayer_indices = dat['monolayer_indices'] nframes = dat['nframes'] debug = kwargs.pop('debug',False) kwargs_out = dict(curvilinear=calc.get('specs',{}).get('curvilinear',False)) #---parallel mesh = [[],[]] if debug: mn,fr = 0,10 makemesh(dat['points'][fr][where(monolayer_indices==mn)],dat['vecs'][fr], debug=True,**kwargs_out) sys.exit(1) for mn in range(2): start = time.time() mesh[mn] = Parallel(n_jobs=work.nprocs,verbose=0)( delayed(makemesh)( dat['points'][fr][where(monolayer_indices==mn)],dat['vecs'][fr],**kwargs_out) for fr in framelooper(nframes,start=start,text='monolayer %d, frame'%mn)) checktime() #---pack attrs,result = {},{} result['nframes'] = array(nframes) result['vecs'] = dat['vecs'] result['resnames'] = resnames result['monolayer_indices'] = monolayer_indices #---pack mesh objects #---keys include: vertnorms simplices nmol facenorms gauss points vec ghost_ids mean principals areas keylist = mesh[0][0].keys() for key in keylist: for mn in range(2): for fr in range(nframes): result['%d.%d.%s'%(mn,fr,key)] = mesh[mn][fr][key] return result,attrs
def undulations(**kwargs): """ Compute bilayer midplane structures for studying undulations. """ #---parameters sn = kwargs['sn'] work = kwargs['workspace'] calc = kwargs['calc'] upname = 'lipid_abstractor' grid_spacing = calc['specs']['grid_spacing'] vecs = datmerge(kwargs, upname, 'vecs') nframes = int(np.sum(datmerge(kwargs, upname, 'nframes'))) trajectory = datmerge(kwargs, upname, 'points') attrs, result = {}, {} #---! hacking through error with monolayer separation try: monolayer_indices = kwargs['upstream'][upname + '0']['monolayer_indices'] except: monolayer_indices = kwargs['upstream'][upname]['monolayer_indices'] #---choose grid dimensions grid = np.array([round(i) for i in np.mean(vecs, axis=0) / grid_spacing])[:2] #---! removed timeseries from result for new version of omnicalc #---parallel mesh = [[], []] for mn in range(2): start = time.time() mesh[mn] = Parallel( n_jobs=work.nprocs, verbose=0, require='sharedmem')( delayed(makemesh_regular)(trajectory[fr][np.where( monolayer_indices == mn)], vecs[fr], grid) for fr in framelooper( nframes, start=start, text='monolayer %d, frame' % mn)) checktime() #---pack result['mesh'] = np.array(mesh) result['grid'] = np.array(grid) result['nframes'] = np.array(nframes) result['vecs'] = vecs attrs['grid_spacing'] = grid_spacing return result, attrs
def undulations(**kwargs): """ Compute bilayer midplane structures for studying undulations. """ #---parameters sn = kwargs['sn'] work = kwargs['workspace'] calc = kwargs['calc'] grid_spacing = calc['specs']['grid_spacing'] dat = kwargs['upstream']['lipid_abstractor'] nframes = dat['nframes'] #---choose grid dimensions grid = array([round(i) for i in mean(dat['vecs'], axis=0) / grid_spacing])[:2] monolayer_indices = dat['monolayer_indices'] #---parallel start = time.time() mesh = [[], []] for mn in range(2): mesh[mn] = Parallel(n_jobs=work.nprocs, verbose=0)( delayed(makemesh_regular)(dat['points'][fr][where( monolayer_indices == mn)], dat['vecs'][fr], grid) for fr in framelooper( nframes, start=start, text='monolayer %d, frame' % mn)) checktime() #---pack attrs, result = {}, {} result['mesh'] = array(mesh) result['grid'] = array(grid) result['nframes'] = array(nframes) result['vecs'] = dat['vecs'] result['timeseries'] = work.slice(sn)[kwargs['slice_name']][ 'all' if not kwargs['group'] else kwargs['group']]['timeseries'] attrs['grid_spacing'] = grid_spacing return result, attrs
def action(self,calculation_name=None): """ Parse a specifications file to make changes to a workspace. This function interprets the specifications and acts on it. It manages the irreducible units of an omnicalc operation and ensures that the correct data are sent to analysis functions in the right order. """ status('parsing specs file',tag='status') #---load the yaml specifications file specs = self.load_specs() #### status('done loading specs',tag='status') #---read simulations from the slices dictionary sns = specs['slices'].keys() #---variables are passed directly to self.vars self.vars = deepcopy(specs['variables']) if 'variables' in specs else {} #---apply "+"-delimited internal references in the yaml file for path,sub in [(i,j[-1]) for i,j in catalog(specs) if type(j)==list and type(j)==str and re.match('^\+',j[-1])]: source = delve(self.vars,*sub.strip('+').split('/')) point = delve(specs,*path[:-1]) point[path[-1]][point[path[-1]].index(sub)] = source for path,sub in [(i,j) for i,j in catalog(specs) if type(j)==str and re.match('^\+',j)]: source = delve(self.vars,*sub.strip('+').split('/')) point = delve(specs,*path[:-1]) point[path[-1]] = source #---loop over all simulations to create groups and slices self.save(quiet=True) for route in [('slices',i) for i in sns]: root,sn = delve(specs,*route),route[-1] #---create groups if 'groups' in root: for group,select in root['groups'].items(): kwargs = {'group':group,'select':select,'sn':sn} self.create_group(**kwargs) root.pop('groups') #---slice the trajectory if 'slices' in root: for sl,details in root['slices'].items(): #---! use a default group here? for group in details['groups']: kwargs = {'sn':sn,'start':details['start'], 'end':details['end'],'skip':details['skip'],'slice_name':sl} kwargs['group'] = group if 'pbc' in details: kwargs['pbc'] = details['pbc'] self.create_slice(**kwargs) root.pop('slices') if root != {}: raise Exception('[ERROR] unprocessed specifications %s'%str(root)) else: del root #---we only save after writing all slices. if the slicer fails autoreload will find preexisting files self.save(quiet=True) checktime() #---meta is passed to self.meta if 'meta' in specs: for sn in specs['meta']: self.meta[sn] = specs['meta'][sn] #---collections are groups of simulations if 'collections' in specs: self.vars['collections'] = specs['collections'] #---calculations are executed last and organized in this loop if 'calculations' in specs: status('starting calculations',tag='status') #---note that most variables including calc mirror the specs file self.calc = dict(specs['calculations']) #---infer the correct order for the calculation keys from their upstream dependencies upstream_catalog = [i for i,j in catalog(self.calc) if 'upstream' in i] #---if there are no specs required to get the upstream data object the user can either #---...use none/None as a placeholder or use the name as the key as in "upstream: name" for uu,uc in enumerate(upstream_catalog): if uc[-1]=='upstream': upstream_catalog[uu] = upstream_catalog[uu]+[delve(self.calc,*uc)] depends = {t[0]:[t[ii+1] for ii,i in enumerate(t) if ii<len(t)-1 and t[ii]=='upstream'] for t in upstream_catalog} calckeys = [i for i in self.calc if i not in depends] #---check that the calckeys has enough elements list(set(calckeys+[i for j in depends.values() for i in j])) #---! come back to this! while any(depends): ii,i = depends.popitem() if all([j in calckeys for j in i]) and i!=[]: calckeys.append(ii) else: depends[ii] = i #---if a specific calculation name is given then only perform that calculation if not calculation_name is None: calckeys = [calculation_name] for calcname in calckeys: details = specs['calculations'][calcname] status('checking calculation %s'%calcname,tag='status') new_calcs = self.interpret_specs(details) #---perform calculations for calc in new_calcs: #---find the script with the funtion fns = [] for (dirpath, dirnames, filenames) in os.walk('./'): fns.extend([dirpath+'/'+fn for fn in filenames]) search = filter(lambda x:re.match('^\.\/[^ate].+\/%s\.py$'%calcname,x),fns) if len(search)==0: raise Exception('\n[ERROR] cannot find %s.py'%calcname) elif len(search)>1: raise Exception('\n[ERROR] redundant matches: %s'%str(search)) else: sys.path.insert(0,os.path.dirname(search[0])) function = unpacker(search[0],calcname) status('computing %s'%calcname,tag='loop') computer(function,calc=calc,workspace=self) self.save() checktime() self.save()
def lipid_abstractor(grofile, trajfile, **kwargs): """ LIPID ABSTRACTOR Reduce a bilayer simulation to a set of points. """ #---unpack sn = kwargs['sn'] work = kwargs['workspace'] parallel = kwargs.get('parallel', False) #---prepare universe #---note that the universe throws a UserWarning on coarse-grained systems #---...which is annoying to elevate to error stage and handled below without problems uni = MDAnalysis.Universe(grofile, trajfile) nframes = len(uni.trajectory) #---MDAnalysis uses Angstroms not nm lenscale = 10. #---select residues of interest selector = kwargs['calc']['specs']['selector'] nojumps = kwargs['calc']['specs'].get('nojumps', '') #---center of mass over residues if 'type' in selector and selector[ 'type'] == 'com' and 'resnames' in selector: resnames = selector['resnames'] selstring = '(' + ' or '.join(['resname %s' % i for i in resnames]) + ')' elif 'type' in selector and selector[ 'type'] == 'select' and 'selection' in selector: if 'resnames' not in selector: raise Exception('add resnames to the selector') selstring = selector['selection'] elif selector.get('type', None) == 'custom': custom_exec_vars = dict(uni=uni, selector=selector) exec(selector['custom'], globals(), custom_exec_vars) selstring = custom_exec_vars['selstring'] else: raise Exception('\n[ERROR] unclear selection %s' % str(selector)) #---compute masses by atoms within the selection sel = uni.select_atoms(selstring) if len(sel) == 0: raise Exception('empty selection') mass_table = { 'H': 1.008, 'C': 12.011, 'O': 15.999, 'N': 14.007, 'P': 30.974, 'S': 32.065 } missing_atoms_aamd = list( set([i[0] for i in sel.atoms.names if i[0] not in mass_table])) if any(missing_atoms_aamd): print( '[WARNING] missing mass for atoms %s so we assume this is coarse-grained' % missing_atoms_aamd) #---MARTINI masses mass_table = { 'C': 72, 'N': 72, 'P': 72, 'S': 45, 'G': 72, 'D': 72, 'R': 72 } missing_atoms_cgmd = list( set([i[0] for i in sel.atoms.names if i[0] not in mass_table])) if any(missing_atoms_cgmd): raise Exception( 'we are trying to assign masses. if this simulation is atomistic then we are ' + 'missing atoms "%s". if it is MARTINI then we are missing atoms "%s"' % (missing_atoms_aamd, missing_atoms_cgmd)) else: masses = np.array([mass_table[i[0]] for i in sel.atoms.names]) else: masses = np.array([mass_table[i[0]] for i in sel.atoms.names]) # note that the following sequence has been reworked to reflect apparent changes in the # ... residue-handling. previously we used `if len(sel.resids)==len(np.unique(sel.resids)):` but this # ... is now incompatible resids = sel.residues.resids # create lookup table of residue indices if len(resids) == len(np.unique(resids)): divider = [np.where(sel.resids == r) for r in np.unique(resids)] # note that redundant residue numbering requires special treatment else: #! note that the resid handling change above may not have been implemented in the custom method below if (('type' in selector) and (selector['type'] in ['com', 'select']) and ('resnames' in selector)): #---note that MDAnalysis sel.residues *cannot* handle redundant numbering #---note also that some test cases have redundant residues *and* adjacent residues with #---...the same numbering. previously we tried a method that used the following sequence: #---......divider = [np.where(np.in1d(np.where(np.in1d( #---..........uni.select_atoms('all').resnames,resnames))[0],d))[0] for d in divider_abs] #---...however this method is flawed because it uses MDAnalysis sel.residues and in fact #---...since it recently worked, RPB suspects that a recent patch to MDAnalysis has broken it #---note that rpb started a method to correct this and found v inconsistent MDAnalysis behavior #---the final fix is heavy-handed: leaving nothing to MDAnalysis subselections allsel = uni.select_atoms('all') lipids = np.where( np.in1d(allsel.resnames, np.array(selector['resnames'])))[0] resid_changes = np.concatenate(([ -1 ], np.where( allsel[lipids].resids[1:] != allsel[lipids].resids[:-1])[0])) residue_atomcounts = resid_changes[1:] - resid_changes[:-1] #---get the residue names for each lipid in our selection by the first atom in that lipid #---the resid_changes is prepended with -1 in the unlikely (but it happened) event that #---...a unique lipid leads this list (note that a blase comment dismissed this possibility at #---...first!) and here we correct the resnames list to reflect this. resnames samples the last #---...atom in each residue from allsel resnames = np.concatenate( (allsel[lipids].resnames[resid_changes[1:]], [allsel[lipids].resnames[-1]])) guess_atoms_per_residue = np.array( zip(resnames, residue_atomcounts)) #---get consensus counts for each lipid name atoms_per_residue = {} for name in np.unique(resnames): #---get the most common count counts, obs_counts = np.unique( guess_atoms_per_residue[:, 1][np.where( guess_atoms_per_residue[:, 0] == name)[0]].astype(int), return_counts=True) atoms_per_residue[name] = counts[obs_counts.argmax()] #---faster method resid_to_start = np.transpose( np.unique(allsel.resids, return_index=True)) resid_to_start = np.concatenate( (resid_to_start, [[resid_to_start[-1][0] + 1, len(lipids)]])) divider = np.array([ np.arange(i, j) for i, j in np.transpose((resid_to_start[:, 1][:-1], resid_to_start[:, 1][1:])) ]) #---make sure no molecules have the wrong number of atoms if not set(np.unique([len(i) for i in divider])) == set( atoms_per_residue.values()): status('checking lipid residue indices the careful way', tag='warning') #---the following method is slow on large systems. we use it when the fast method above fails #---iterate over the list of lipid atoms and get the indices for each N-atoms for each lipid counter, divider = 0, [] while counter < len(lipids): status('indexing lipids', i=counter, looplen=len(lipids), tag='compute') #---until the end, get the next lipid resname this_resname = allsel.resnames[lipids][counter] if selector['type'] == 'select': #---the only way to subselect here is to select on each divided lipid (since #---...the procedure above has correctly divided the lipids). we perform the #---...subselection by pivoting over indices #---! this method needs checked this_inds = np.arange( counter, counter + atoms_per_residue[this_resname]) this_lipid = allsel[lipids][this_inds] this_subsel = np.where( np.in1d( this_lipid.indices, this_lipid.select_atoms( selector['selection']).indices))[0] divider.append(this_inds[this_subsel]) else: divider.append( np.arange( counter, counter + atoms_per_residue[this_resname])) counter += atoms_per_residue[this_resname] #---in the careful method the sel from above is broken but allsel[lipids] is correct sel = allsel[lipids] masses = np.array([mass_table[i[0]] for i in sel.atoms.names]) else: import ipdb ipdb.set_trace() raise Exception( 'residues have redundant resids and selection is not the easy one' ) #---load trajectory into memory trajectory, vecs = [], [] for fr in range(nframes): status('loading frame', tag='load', i=fr, looplen=nframes) uni.trajectory[fr] trajectory.append(sel.positions / lenscale) #! critical fix: you must cast the dimensions or you get repeated vectors vecs.append(np.array(uni.trajectory[fr].dimensions[:3])) vecs = np.array(vecs) / lenscale checktime() #---parallel start = time.time() if parallel: coms = Parallel(n_jobs=work.nprocs, verbose=0)( delayed(codes.mesh.centroid)(trajectory[fr], masses, divider) for fr in framelooper(nframes, start=start)) else: coms = [] for fr in range(nframes): status('computing centroid', tag='compute', i=fr, looplen=nframes, start=start) coms.append(codes.mesh.centroid(trajectory[fr], masses, divider)) #---identify leaflets status('identify leaflets', tag='compute') separator = kwargs['calc']['specs'].get('separator', {}) leaflet_finder_trials = separator.get('trials', 3) #---preselect a few frames, always including the zeroth selected_frames = [0] + list( np.random.choice( np.arange(1, nframes), leaflet_finder_trials, replace=False)) #---alternate lipid representation is useful for separating monolayers if 'lipid_tip' in separator: tip_select = separator['lipid_tip'] sel = uni.select_atoms(tip_select) atoms_separator = [] for fr in selected_frames: uni.trajectory[fr] atoms_separator.append(sel.positions / lenscale) #---default is to use the centers of mass to distinguish leaflets else: atoms_separator = [coms[fr] for fr in selected_frames] #---pass frames to the leaflet finder, which has legacy and cluster modes leaflet_finder = codes.mesh.LeafletFinder( atoms_separator=atoms_separator, #---pass along the corresponding vectors for topologize vecs=[vecs[i] for i in selected_frames], cluster=separator.get('cluster', False), cluster_neighbors=separator.get('cluster_neighbors', None), topologize_tolerance=separator.get('topologize_tolerance', None)) #---get the indices from the leaflet finder monolayer_indices = leaflet_finder.monolayer_indices # for convenience when doing planar bilayers we put the zero index on top top_mono = np.argmax([ atoms_separator[0][monolayer_indices == i][:, 2].mean() for i in range(2) ]) if top_mono != 0: monolayer_indices = 1 - monolayer_indices checktime() coms_out = np.array(coms) #---remove jumping in some directions if requested if nojumps: nojump_dims = ['xyz'.index(j) for j in nojumps] nobjs = coms_out.shape[1] displacements = np.array([(coms_out[1:] - coms_out[:-1])[..., i] for i in range(3)]) for d in nojump_dims: shift_binary = ( np.abs(displacements) * (1. - 2 * (displacements < 0)) / (np.transpose(np.tile(vecs[:-1], (nobjs, 1, 1))) / 2.))[d].astype(int) shift = (np.cumsum(-1 * shift_binary, axis=0) * np.transpose(np.tile(vecs[:-1, d], (nobjs, 1)))) coms_out[1:, :, d] += shift #---pack attrs, result = {}, {} attrs['selector'] = selector attrs['nojumps'] = nojumps result['resnames'] = np.array(sel.residues.resnames) result['monolayer_indices'] = np.array(monolayer_indices) result['vecs'] = vecs result['nframes'] = np.array(nframes) result['points'] = coms_out result['resids'] = np.array(np.unique(resids)) result['resids_exact'] = resids attrs['separator'] = kwargs['calc']['specs']['separator'] return result, attrs