Esempio n. 1
0
def import_readymade_meso_v2_membrane(**kwargs):
    """
	Compute bilayer midplane structures for studying undulations.
	Adapted from `undulations.py`.
	"""
    import ipdb
    ipdb.set_trace()
    #---parameters
    sn = kwargs['sn']
    work = kwargs['workspace']
    calc = kwargs['calc']
    #---import mesh points
    points = import_membrane_mesh(sn=sn, calc=calc, work=work)
    #---ensure there are the same number of points
    points_shapes = list(set([p.shape for p in points]))
    if len(points_shapes) != 1:
        raise Exception('some frames have a different number of points: %s' %
                        points_shapes)
    else:
        npoints, ncols = points_shapes[0]
    if ncols != 4: raise Exception('expecting 4-column input on incoming data')
    #---with a consistent number of points everything is an array
    points = np.array(points)[:, :, :3]
    #---previously checked that the minimum points were identically zero but this was not always true
    #---box vectors are just the maximum points
    #---! check that this assumption makes sense
    vecs = points.max(axis=1)[:, :3]
    #---debug the shapes in 3D
    if False:
        from codes import review3d
        fr = 0
        review3d.pbcbox(vecs[fr])
        review3d.review3d(points=[points[fr][:, :3]], radius=10)
    grid_spacing = calc['specs']['grid_spacing']
    nframes = len(points)
    #---choose grid dimensions
    grid = np.array([round(i)
                     for i in np.mean(vecs, axis=0) / grid_spacing])[:2]
    #---compute in parallel
    start = time.time()
    mesh = [[]]
    mesh[0] = Parallel(n_jobs=work.nprocs, verbose=0)(
        delayed(makemesh_regular, has_shareable_memory)(points[fr], vecs[fr],
                                                        grid)
        for fr in framelooper(nframes, start=start, text='frame'))
    checktime()
    #---pack
    attrs, result = {}, {}
    result['mesh'] = np.array(mesh)
    result['grid'] = np.array(grid)
    result['nframes'] = np.array(nframes)
    result['vecs'] = vecs
    attrs['grid_spacing'] = grid_spacing
    #---introduce a dated validator string to ensure that any changes to the pipeline do not overwrite
    #---...other data and are fully propagated downstream
    attrs['validator'] = '2017.08.16.1930'
    return result, attrs
Esempio n. 2
0
def lipid_mesh(**kwargs):

	"""
	Compute monolayer mesh objects.
	"""

	#---parameters
	sn = kwargs['sn']
	work = kwargs['workspace']
	calc = kwargs['calc']
	dat = kwargs['upstream']['lipid_abstractor']
	resnames = dat['resnames']
	monolayer_indices = dat['monolayer_indices']
	nframes = dat['nframes']
	debug = kwargs.pop('debug',False)
	kwargs_out = dict(curvilinear=calc.get('specs',{}).get('curvilinear',False))

	#---parallel
	mesh = [[],[]]
	if debug: 
		mn,fr = 0,10
		makemesh(dat['points'][fr][where(monolayer_indices==mn)],dat['vecs'][fr],
			debug=True,**kwargs_out)
		sys.exit(1)
	for mn in range(2):
		start = time.time()
		mesh[mn] = Parallel(n_jobs=work.nprocs,verbose=0)(
			delayed(makemesh)(
				dat['points'][fr][where(monolayer_indices==mn)],dat['vecs'][fr],**kwargs_out)
			for fr in framelooper(nframes,start=start,text='monolayer %d, frame'%mn))
	checktime()

	#---pack
	attrs,result = {},{}
	result['nframes'] = array(nframes)
	result['vecs'] = dat['vecs']
	result['resnames'] = resnames
	result['monolayer_indices'] = monolayer_indices
		
	#---pack mesh objects
	#---keys include: vertnorms simplices nmol facenorms gauss points vec ghost_ids mean principals areas
	keylist = mesh[0][0].keys()
	for key in keylist:
		for mn in range(2):
			for fr in range(nframes): 
				result['%d.%d.%s'%(mn,fr,key)] = mesh[mn][fr][key]		
				
	return result,attrs	
Esempio n. 3
0
def undulations(**kwargs):
    """
	Compute bilayer midplane structures for studying undulations.
	"""

    #---parameters
    sn = kwargs['sn']
    work = kwargs['workspace']
    calc = kwargs['calc']
    upname = 'lipid_abstractor'
    grid_spacing = calc['specs']['grid_spacing']
    vecs = datmerge(kwargs, upname, 'vecs')
    nframes = int(np.sum(datmerge(kwargs, upname, 'nframes')))
    trajectory = datmerge(kwargs, upname, 'points')
    attrs, result = {}, {}
    #---! hacking through error with monolayer separation
    try:
        monolayer_indices = kwargs['upstream'][upname +
                                               '0']['monolayer_indices']
    except:
        monolayer_indices = kwargs['upstream'][upname]['monolayer_indices']
    #---choose grid dimensions
    grid = np.array([round(i)
                     for i in np.mean(vecs, axis=0) / grid_spacing])[:2]
    #---! removed timeseries from result for new version of omnicalc
    #---parallel
    mesh = [[], []]
    for mn in range(2):
        start = time.time()
        mesh[mn] = Parallel(
            n_jobs=work.nprocs, verbose=0, require='sharedmem')(
                delayed(makemesh_regular)(trajectory[fr][np.where(
                    monolayer_indices == mn)], vecs[fr], grid)
                for fr in framelooper(
                    nframes, start=start, text='monolayer %d, frame' % mn))
    checktime()

    #---pack
    result['mesh'] = np.array(mesh)
    result['grid'] = np.array(grid)
    result['nframes'] = np.array(nframes)
    result['vecs'] = vecs
    attrs['grid_spacing'] = grid_spacing
    return result, attrs
Esempio n. 4
0
def undulations(**kwargs):
    """
	Compute bilayer midplane structures for studying undulations.
	"""

    #---parameters
    sn = kwargs['sn']
    work = kwargs['workspace']
    calc = kwargs['calc']
    grid_spacing = calc['specs']['grid_spacing']
    dat = kwargs['upstream']['lipid_abstractor']
    nframes = dat['nframes']

    #---choose grid dimensions
    grid = array([round(i)
                  for i in mean(dat['vecs'], axis=0) / grid_spacing])[:2]
    monolayer_indices = dat['monolayer_indices']

    #---parallel
    start = time.time()
    mesh = [[], []]
    for mn in range(2):
        mesh[mn] = Parallel(n_jobs=work.nprocs, verbose=0)(
            delayed(makemesh_regular)(dat['points'][fr][where(
                monolayer_indices == mn)], dat['vecs'][fr], grid)
            for fr in framelooper(
                nframes, start=start, text='monolayer %d, frame' % mn))
    checktime()

    #---pack
    attrs, result = {}, {}
    result['mesh'] = array(mesh)
    result['grid'] = array(grid)
    result['nframes'] = array(nframes)
    result['vecs'] = dat['vecs']
    result['timeseries'] = work.slice(sn)[kwargs['slice_name']][
        'all' if not kwargs['group'] else kwargs['group']]['timeseries']
    attrs['grid_spacing'] = grid_spacing
    return result, attrs
Esempio n. 5
0
	def action(self,calculation_name=None):
	
		"""
		Parse a specifications file to make changes to a workspace.
		This function interprets the specifications and acts on it. 
		It manages the irreducible units of an omnicalc operation and ensures
		that the correct data are sent to analysis functions in the right order.
		"""

		status('parsing specs file',tag='status')

		#---load the yaml specifications file
		specs = self.load_specs()
		#### status('done loading specs',tag='status')		
		
		#---read simulations from the slices dictionary
		sns = specs['slices'].keys()
		#---variables are passed directly to self.vars
		self.vars = deepcopy(specs['variables']) if 'variables' in specs else {}

		#---apply "+"-delimited internal references in the yaml file
		for path,sub in [(i,j[-1]) for i,j in catalog(specs) if type(j)==list 
			and type(j)==str and re.match('^\+',j[-1])]:
			source = delve(self.vars,*sub.strip('+').split('/'))
			point = delve(specs,*path[:-1])
			point[path[-1]][point[path[-1]].index(sub)] = source
		for path,sub in [(i,j) for i,j in catalog(specs) if type(j)==str and re.match('^\+',j)]:
			source = delve(self.vars,*sub.strip('+').split('/'))
			point = delve(specs,*path[:-1])
			point[path[-1]] = source
		
		#---loop over all simulations to create groups and slices
		self.save(quiet=True)
		for route in [('slices',i) for i in sns]:
			root,sn = delve(specs,*route),route[-1]
			#---create groups
			if 'groups' in root:
				for group,select in root['groups'].items():
					kwargs = {'group':group,'select':select,'sn':sn}
					self.create_group(**kwargs)
				root.pop('groups')
			#---slice the trajectory
			if 'slices' in root:
				for sl,details in root['slices'].items(): 
					#---! use a default group here?
					for group in details['groups']:
						kwargs = {'sn':sn,'start':details['start'],
							'end':details['end'],'skip':details['skip'],'slice_name':sl}
						kwargs['group'] = group
						if 'pbc' in details: kwargs['pbc'] = details['pbc']
						self.create_slice(**kwargs)
				root.pop('slices')
			if root != {}: raise Exception('[ERROR] unprocessed specifications %s'%str(root))
			else: del root
		#---we only save after writing all slices. if the slicer fails autoreload will find preexisting files
		self.save(quiet=True)
		checktime()

		#---meta is passed to self.meta
		if 'meta' in specs:
			for sn in specs['meta']:
				self.meta[sn] = specs['meta'][sn]

		#---collections are groups of simulations
		if 'collections' in specs: self.vars['collections'] = specs['collections']

		#---calculations are executed last and organized in this loop
		if 'calculations' in specs:
			status('starting calculations',tag='status')
			#---note that most variables including calc mirror the specs file
			self.calc = dict(specs['calculations'])
			#---infer the correct order for the calculation keys from their upstream dependencies
			upstream_catalog = [i for i,j in catalog(self.calc) if 'upstream' in i]
			#---if there are no specs required to get the upstream data object the user can either 
			#---...use none/None as a placeholder or use the name as the key as in "upstream: name"
			for uu,uc in enumerate(upstream_catalog):
				if uc[-1]=='upstream': upstream_catalog[uu] = upstream_catalog[uu]+[delve(self.calc,*uc)]
			depends = {t[0]:[t[ii+1] for ii,i in enumerate(t) if ii<len(t)-1 and t[ii]=='upstream'] 
				for t in upstream_catalog}
			calckeys = [i for i in self.calc if i not in depends]
			#---check that the calckeys has enough elements 
			list(set(calckeys+[i for j in depends.values() for i in j]))			
			#---! come back to this!
			while any(depends):
				ii,i = depends.popitem()
				if all([j in calckeys for j in i]) and i!=[]: calckeys.append(ii)
				else: depends[ii] = i
			#---if a specific calculation name is given then only perform that calculation
			if not calculation_name is None: calckeys = [calculation_name]
			for calcname in calckeys:
				details = specs['calculations'][calcname]
				status('checking calculation %s'%calcname,tag='status')
				new_calcs = self.interpret_specs(details)
				#---perform calculations
				for calc in new_calcs:
					#---find the script with the funtion
					fns = []
					for (dirpath, dirnames, filenames) in os.walk('./'): 
						fns.extend([dirpath+'/'+fn for fn in filenames])
					search = filter(lambda x:re.match('^\.\/[^ate].+\/%s\.py$'%calcname,x),fns)
					if len(search)==0: raise Exception('\n[ERROR] cannot find %s.py'%calcname)
					elif len(search)>1: raise Exception('\n[ERROR] redundant matches: %s'%str(search))
					else:
						sys.path.insert(0,os.path.dirname(search[0]))
						function = unpacker(search[0],calcname)
						status('computing %s'%calcname,tag='loop')
						computer(function,calc=calc,workspace=self)
						self.save()
					checktime()
		self.save()
def lipid_abstractor(grofile, trajfile, **kwargs):
    """
	LIPID ABSTRACTOR
	Reduce a bilayer simulation to a set of points.
	"""

    #---unpack
    sn = kwargs['sn']
    work = kwargs['workspace']
    parallel = kwargs.get('parallel', False)
    #---prepare universe
    #---note that the universe throws a UserWarning on coarse-grained systems
    #---...which is annoying to elevate to error stage and handled below without problems
    uni = MDAnalysis.Universe(grofile, trajfile)
    nframes = len(uni.trajectory)
    #---MDAnalysis uses Angstroms not nm
    lenscale = 10.
    #---select residues of interest
    selector = kwargs['calc']['specs']['selector']
    nojumps = kwargs['calc']['specs'].get('nojumps', '')

    #---center of mass over residues
    if 'type' in selector and selector[
            'type'] == 'com' and 'resnames' in selector:
        resnames = selector['resnames']
        selstring = '(' + ' or '.join(['resname %s' % i
                                       for i in resnames]) + ')'
    elif 'type' in selector and selector[
            'type'] == 'select' and 'selection' in selector:
        if 'resnames' not in selector:
            raise Exception('add resnames to the selector')
        selstring = selector['selection']
    elif selector.get('type', None) == 'custom':
        custom_exec_vars = dict(uni=uni, selector=selector)
        exec(selector['custom'], globals(), custom_exec_vars)
        selstring = custom_exec_vars['selstring']
    else:
        raise Exception('\n[ERROR] unclear selection %s' % str(selector))

    #---compute masses by atoms within the selection
    sel = uni.select_atoms(selstring)
    if len(sel) == 0: raise Exception('empty selection')
    mass_table = {
        'H': 1.008,
        'C': 12.011,
        'O': 15.999,
        'N': 14.007,
        'P': 30.974,
        'S': 32.065
    }
    missing_atoms_aamd = list(
        set([i[0] for i in sel.atoms.names if i[0] not in mass_table]))
    if any(missing_atoms_aamd):
        print(
            '[WARNING] missing mass for atoms %s so we assume this is coarse-grained'
            % missing_atoms_aamd)
        #---MARTINI masses
        mass_table = {
            'C': 72,
            'N': 72,
            'P': 72,
            'S': 45,
            'G': 72,
            'D': 72,
            'R': 72
        }
        missing_atoms_cgmd = list(
            set([i[0] for i in sel.atoms.names if i[0] not in mass_table]))
        if any(missing_atoms_cgmd):
            raise Exception(
                'we are trying to assign masses. if this simulation is atomistic then we are '
                +
                'missing atoms "%s". if it is MARTINI then we are missing atoms "%s"'
                % (missing_atoms_aamd, missing_atoms_cgmd))
        else:
            masses = np.array([mass_table[i[0]] for i in sel.atoms.names])
    else:
        masses = np.array([mass_table[i[0]] for i in sel.atoms.names])

    # note that the following sequence has been reworked to reflect apparent changes in the
    # ... residue-handling. previously we used `if len(sel.resids)==len(np.unique(sel.resids)):` but this
    # ... is now incompatible
    resids = sel.residues.resids
    # create lookup table of residue indices
    if len(resids) == len(np.unique(resids)):
        divider = [np.where(sel.resids == r) for r in np.unique(resids)]
    # note that redundant residue numbering requires special treatment
    else:
        #! note that the resid handling change above may not have been implemented in the custom method below
        if (('type' in selector) and (selector['type'] in ['com', 'select'])
                and ('resnames' in selector)):
            #---note that MDAnalysis sel.residues *cannot* handle redundant numbering
            #---note also that some test cases have redundant residues *and* adjacent residues with
            #---...the same numbering. previously we tried a method that used the following sequence:
            #---......divider = [np.where(np.in1d(np.where(np.in1d(
            #---..........uni.select_atoms('all').resnames,resnames))[0],d))[0] for d in divider_abs]
            #---...however this method is flawed because it uses MDAnalysis sel.residues and in fact
            #---...since it recently worked, RPB suspects that a recent patch to MDAnalysis has broken it
            #---note that rpb started a method to correct this and found v inconsistent MDAnalysis behavior
            #---the final fix is heavy-handed: leaving nothing to MDAnalysis subselections
            allsel = uni.select_atoms('all')
            lipids = np.where(
                np.in1d(allsel.resnames, np.array(selector['resnames'])))[0]
            resid_changes = np.concatenate(([
                -1
            ], np.where(
                allsel[lipids].resids[1:] != allsel[lipids].resids[:-1])[0]))
            residue_atomcounts = resid_changes[1:] - resid_changes[:-1]
            #---get the residue names for each lipid in our selection by the first atom in that lipid
            #---the resid_changes is prepended with -1 in the unlikely (but it happened) event that
            #---...a unique lipid leads this list (note that a blase comment dismissed this possibility at
            #---...first!) and here we correct the resnames list to reflect this. resnames samples the last
            #---...atom in each residue from allsel
            resnames = np.concatenate(
                (allsel[lipids].resnames[resid_changes[1:]],
                 [allsel[lipids].resnames[-1]]))
            guess_atoms_per_residue = np.array(
                zip(resnames, residue_atomcounts))
            #---get consensus counts for each lipid name
            atoms_per_residue = {}
            for name in np.unique(resnames):
                #---get the most common count
                counts, obs_counts = np.unique(
                    guess_atoms_per_residue[:, 1][np.where(
                        guess_atoms_per_residue[:, 0] == name)[0]].astype(int),
                    return_counts=True)
                atoms_per_residue[name] = counts[obs_counts.argmax()]
            #---faster method
            resid_to_start = np.transpose(
                np.unique(allsel.resids, return_index=True))
            resid_to_start = np.concatenate(
                (resid_to_start, [[resid_to_start[-1][0] + 1,
                                   len(lipids)]]))
            divider = np.array([
                np.arange(i, j)
                for i, j in np.transpose((resid_to_start[:, 1][:-1],
                                          resid_to_start[:, 1][1:]))
            ])
            #---make sure no molecules have the wrong number of atoms
            if not set(np.unique([len(i) for i in divider])) == set(
                    atoms_per_residue.values()):
                status('checking lipid residue indices the careful way',
                       tag='warning')
                #---the following method is slow on large systems. we use it when the fast method above fails
                #---iterate over the list of lipid atoms and get the indices for each N-atoms for each lipid
                counter, divider = 0, []
                while counter < len(lipids):
                    status('indexing lipids',
                           i=counter,
                           looplen=len(lipids),
                           tag='compute')
                    #---until the end, get the next lipid resname
                    this_resname = allsel.resnames[lipids][counter]
                    if selector['type'] == 'select':
                        #---the only way to subselect here is to select on each divided lipid (since
                        #---...the procedure above has correctly divided the lipids). we perform the
                        #---...subselection by pivoting over indices
                        #---! this method needs checked
                        this_inds = np.arange(
                            counter, counter + atoms_per_residue[this_resname])
                        this_lipid = allsel[lipids][this_inds]
                        this_subsel = np.where(
                            np.in1d(
                                this_lipid.indices,
                                this_lipid.select_atoms(
                                    selector['selection']).indices))[0]
                        divider.append(this_inds[this_subsel])
                    else:
                        divider.append(
                            np.arange(
                                counter,
                                counter + atoms_per_residue[this_resname]))
                    counter += atoms_per_residue[this_resname]
                #---in the careful method the sel from above is broken but allsel[lipids] is correct
                sel = allsel[lipids]
                masses = np.array([mass_table[i[0]] for i in sel.atoms.names])
        else:
            import ipdb
            ipdb.set_trace()
            raise Exception(
                'residues have redundant resids and selection is not the easy one'
            )

    #---load trajectory into memory
    trajectory, vecs = [], []
    for fr in range(nframes):
        status('loading frame', tag='load', i=fr, looplen=nframes)
        uni.trajectory[fr]
        trajectory.append(sel.positions / lenscale)
        #! critical fix: you must cast the dimensions or you get repeated vectors
        vecs.append(np.array(uni.trajectory[fr].dimensions[:3]))
    vecs = np.array(vecs) / lenscale

    checktime()
    #---parallel
    start = time.time()
    if parallel:
        coms = Parallel(n_jobs=work.nprocs, verbose=0)(
            delayed(codes.mesh.centroid)(trajectory[fr], masses, divider)
            for fr in framelooper(nframes, start=start))
    else:
        coms = []
        for fr in range(nframes):
            status('computing centroid',
                   tag='compute',
                   i=fr,
                   looplen=nframes,
                   start=start)
            coms.append(codes.mesh.centroid(trajectory[fr], masses, divider))

    #---identify leaflets
    status('identify leaflets', tag='compute')
    separator = kwargs['calc']['specs'].get('separator', {})
    leaflet_finder_trials = separator.get('trials', 3)
    #---preselect a few frames, always including the zeroth
    selected_frames = [0] + list(
        np.random.choice(
            np.arange(1, nframes), leaflet_finder_trials, replace=False))
    #---alternate lipid representation is useful for separating monolayers
    if 'lipid_tip' in separator:
        tip_select = separator['lipid_tip']
        sel = uni.select_atoms(tip_select)
        atoms_separator = []
        for fr in selected_frames:
            uni.trajectory[fr]
            atoms_separator.append(sel.positions / lenscale)
    #---default is to use the centers of mass to distinguish leaflets
    else:
        atoms_separator = [coms[fr] for fr in selected_frames]
    #---pass frames to the leaflet finder, which has legacy and cluster modes
    leaflet_finder = codes.mesh.LeafletFinder(
        atoms_separator=atoms_separator,
        #---pass along the corresponding vectors for topologize
        vecs=[vecs[i] for i in selected_frames],
        cluster=separator.get('cluster', False),
        cluster_neighbors=separator.get('cluster_neighbors', None),
        topologize_tolerance=separator.get('topologize_tolerance', None))
    #---get the indices from the leaflet finder
    monolayer_indices = leaflet_finder.monolayer_indices
    # for convenience when doing planar bilayers we put the zero index on top
    top_mono = np.argmax([
        atoms_separator[0][monolayer_indices == i][:, 2].mean()
        for i in range(2)
    ])
    if top_mono != 0: monolayer_indices = 1 - monolayer_indices

    checktime()
    coms_out = np.array(coms)
    #---remove jumping in some directions if requested
    if nojumps:
        nojump_dims = ['xyz'.index(j) for j in nojumps]
        nobjs = coms_out.shape[1]
        displacements = np.array([(coms_out[1:] - coms_out[:-1])[..., i]
                                  for i in range(3)])
        for d in nojump_dims:
            shift_binary = (
                np.abs(displacements) * (1. - 2 * (displacements < 0)) /
                (np.transpose(np.tile(vecs[:-1],
                                      (nobjs, 1, 1))) / 2.))[d].astype(int)
            shift = (np.cumsum(-1 * shift_binary, axis=0) *
                     np.transpose(np.tile(vecs[:-1, d], (nobjs, 1))))
            coms_out[1:, :, d] += shift

    #---pack
    attrs, result = {}, {}
    attrs['selector'] = selector
    attrs['nojumps'] = nojumps
    result['resnames'] = np.array(sel.residues.resnames)
    result['monolayer_indices'] = np.array(monolayer_indices)
    result['vecs'] = vecs
    result['nframes'] = np.array(nframes)
    result['points'] = coms_out
    result['resids'] = np.array(np.unique(resids))
    result['resids_exact'] = resids
    attrs['separator'] = kwargs['calc']['specs']['separator']
    return result, attrs