Beispiel #1
0
def reload():
	"""Load everything for the plot only once."""
	#---canonical globals list
	#---!? can this be made programmatic?
	global sns,scanrange,distributions,distances,data,calc,normalizers,middles
	#---reload sequence goes here
	data,calc = plotload(plotname)
	sns = work.specs['collections']['position']+['membrane-v538']
	#---compute distance distributions
	cutoff = max([data[sn]['data']['water_distances'].max() for sn in sns])
	#---globals for parallel functions
	scanrange = np.arange(0,cutoff,binsize)
	#---distances are indexed by concatenated frames, then ions
	distances = dict([(sn,np.concatenate(data[sn]['data']['water_distances'])) for sn in sns])
	#---parallel compute
	looper = [dict(index=i,sn=sn) for sn in sns for i in range(len(distances[sn]))]
	incoming = np.array(basic_compute_loop(histogram_stack,looper=looper))
	distributions = dict([(sn,np.array([incoming[ii] for ii,i in enumerate(looper) 
		if i['sn']==sn])) for sn in sns])
	#---normalization factors
	middles = (scanrange[1:]+scanrange[:-1])/2
	areas = np.array([4*np.pi*binsize*middles[i]**2 for i in range(len(middles))])
	#---atoms in a nm3: (1000g/18g is mol/L) * 1L/1000ml * 1ml/cm3 / (10**7 nm/cm)**3 = 33.46
	water_density = 6.023*10**23*(1000.0/18)/1000/(10**(9-2))**3
	#---window to estimate bulk, lower than the dropoff, higher than the first two shells
	bulk_window_raw = (0.75,1.0)
	#---normalize the ion-water density for all RDF measurements
	#---note that this is repeated below for zone-specific normalizations
	normalizers = {}
	for sn in sns:
		bulk_window = np.where(np.all((scanrange>=bulk_window_raw[0],
			scanrange<=bulk_window_raw[1]),axis=0))[0]
		#---reestimate ion-water pseudo-density at supposed bulk-like distances
		water_density = (distributions[sn][slice(None,None),bulk_window]/areas[bulk_window]).mean()
		normalizers[sn] = areas*water_density
Beispiel #2
0
def lipid_mesh_partners(**kwargs):
    """
	Compute bilayer midplane structures for studying undulations.
	"""
    #---parameters
    sn = kwargs['sn']
    work = kwargs['workspace']
    calc = kwargs['calc']
    #---! deprecated random trials: n_trials = kwargs['calc']['specs']['n_trials']
    do_randomize = False
    #---globals for parallel
    global dat, results, rxmlook
    dat = kwargs['upstream']['lipid_mesh']
    nmols = [int(dat[i2s(mn, 0, 'nmol')]) for mn in range(2)]
    nframes = int(dat['nframes'])
    resnames = dat['resnames']
    attrs, results = {}, {}

    #---code adapted from previous version at simuluxe and binding_combinator
    resnames = np.array(dat['resnames'])
    nframes = dat['nframes']
    lipids = np.array(
        list(resnames[np.sort(np.unique(resnames, return_index=True)[1])]))
    reslist = list(
        np.array(resnames)[np.sort(np.unique(resnames, return_index=True)[1])])
    results['reslist'] = reslist
    #---collect statistics for pairs and triples
    for nn in [2, 3]:
        combos = np.array([
            ''.join(j) for j in itertools.product(
                ''.join([str(i) for i in range(nn + 1)]), repeat=len(lipids))
            if sum([int(k) for k in j]) == nn
        ])
        combonames = [
            tuple(v) for v in [
                np.concatenate([[lipids[ww]] * int(w)
                                for ww, w in enumerate(l)]) for l in combos
            ]
        ]
        results['combos_%d' % nn] = combos
        results['combonames_%d' % nn] = combonames
        combolookup = np.sum([np.array(combonames) == r for r in reslist],
                             axis=2).T
        combolookup_str = [''.join(['%s' % s for s in i]) for i in combolookup]
        results['combo_lookup_%d' % nn] = combolookup
        results['combo_lookup_str_%d' % nn] = combolookup_str

    #---determine monolayer-specific residue indices
    imono = dat['monolayer_indices']
    nmols = [np.sum(dat['monolayer_indices'] == i) for i in range(2)]
    resnames = np.array(dat['resnames'])
    rxm = [[
        np.array([
            np.where(np.where(imono == mn)[0] == i)[0][0]
            for i in np.where(np.all((imono == mn, resnames == rn), axis=0))[0]
        ]) for rn in reslist
    ] for mn in range(2)]
    rxmlook = [np.zeros(n) for n in nmols]
    for mn in range(2):
        for ri, r in enumerate(rxm[mn]):
            if r != []: rxmlook[mn][r] = ri

    #---count in parallel
    counts_trials = dict([(nn, []) for nn in [2, 3]])
    counts_observed = dict([(nn, None) for nn in [2, 3]])
    for nn in [2, 3]:
        status('observations for nn=%d' % (nn), tag='compute')
        looper = [
            dict(fr=fr, mn=mn, nn=nn) for mn in range(2)
            for fr in range(nframes)
        ]
        incoming = basic_compute_loop(counter, looper)
        #---reindex data mn,fr,combo
        counts_observed[nn] = np.concatenate(incoming).reshape(
            (2, nframes, len(results['combonames_%d' % nn])))
        if do_randomize:
            for trial in range(n_trials):
                results['rxmlook_rand'] = [
                    np.random.permutation(r) for r in rxmlook
                ]
                status('randomize trial for nn=%d trial=%d/%d' %
                       (nn, trial + 1, n_trials),
                       tag='compute')
                looper = [
                    dict(fr=fr, mn=mn, nn=nn, random=True) for mn in range(2)
                    for fr in range(nframes)
                ]
                incoming = basic_compute_loop(counter, looper)
                counts_trials[nn].append(
                    np.concatenate(incoming).reshape(
                        (2, nframes, len(results['combonames_%d' % nn]))))
    if do_randomize:
        counts_random = dict([(nn, np.concatenate([counts_trials[nn]]))
                              for nn in [2, 3]])
    #---pack
    for nn in [2, 3]:
        if do_randomize:
            results['counts_random_%d' % nn] = np.array(counts_random[nn])
        results['counts_observed_%d' % nn] = np.array(counts_observed[nn])
    results.pop('rxmlook_rand', None)
    #---save rxmlook for counting lipids
    for mn in range(2):
        results['monolayer_residues_%d' % mn] = rxmlook[mn]
    return results, attrs
Beispiel #3
0
def plot_height_proximity_correlation(**kwargs):
    """
	Plot the instantaneous membrane height vs proximity to protein points.
	"""
    import seaborn as sb
    # stash to globals to iterate the plot aesthetics
    if 'post' not in globals():
        global post, mesh, vecs, protein_pts
        post = {}
        sample_rate = 1
        for sn in sns:
            # points_all for the dimer simulations has dimensions frames, monomer, points, xyz
            protein_pts = data_prot[sn]['data']['points_all']
            try:
                vecs = data[sn]['data']['vecs']
            except:
                import ipdb
                ipdb.set_trace()
            nframes = len(vecs)
            mesh = data[sn]['data']['mesh'].mean(axis=0)
            ngrid = mesh.shape[-2:]
            mesh -= np.tile(
                mesh.reshape(nframes, -1).mean(axis=1),
                (ngrid[0], ngrid[1], 1)).transpose((2, 0, 1))
            incoming = basic_compute_loop(
                compute_protein_proximity_height_correlation,
                looper=[dict(fr=fr) for fr in range(0, nframes, sample_rate)])
            post[sn] = dict(sizes=[len(i) for i in incoming],
                            incoming=np.concatenate(incoming))

    # regular plot
    binw = 1.0
    axes, fig = square_tiles(1, figsize=(8, 8))
    ax = axes[0]
    pbc_spacing = min(
        [min(data[sn]['data']['vecs'].mean(axis=0)[:2]) for sn in sns])
    colors = sb.color_palette("hls", len(sns))
    for snum, sn in enumerate(sns):
        rmax, zmax = [
            max([np.abs(v['incoming'][:, i]).max() for v in post.values()])
            for i in range(2)
        ]
        bins = np.arange(0, rmax + binw, binw)
        rate = 1
        sample = post[sn]['incoming'][::rate]
        binned = [
            sample[np.all(
                (sample.T[0] >= bins[ii], sample.T[0] <= bins[ii + 1]),
                axis=0)][:, 1] for ii, i in enumerate(bins[:-1])
        ]
        means = np.array([np.mean(i) for i in binned])
        stds = np.array([np.std(i) for i in binned])
        ax.plot(bins[:-1], means, label=sn, color=colors[snum])
        ax.fill_between(bins[:-1],
                        means - stds,
                        means + stds,
                        alpha=0.1,
                        color=colors[snum])
    # there is very little difference between doing the expensive PBC
    ax.set_xlim((0., pbc_spacing / 2.))
    ax.axhline(0, c='k', lw=1)
    plt.legend()
    plt.savefig(os.path.join(work.plotdir, 'fig.height_proximity.png'))
                nmol = len(m2i)
                #---note that depending on the PBC links we get a variable number of
                points_inside = np.array([
                    lipid_mesh['%d.%d.points' % (top_mono, fr)][:nmol]
                    for fr in range(nframes)
                ])
                windows = np.array([
                    np.arange(j, j + smooth_window)
                    for j in np.arange(0, nframes - smooth_window)
                ])
                points_inside_smooth = np.array(
                    [points_inside[w].mean(axis=0) for w in windows])
            #---render in parallel
            basic_compute_loop(
                compute_function=render_hydrogen_bonding_pattern,
                looper=[
                    dict(fr=fr, frameno=frameno)
                    for frameno, fr in enumerate(valid_frames)
                ],
                run_parallel=True,
                debug=False)
            #---render when complete
            try:
                # https://superuser.com/questions/1005315/interpolation-with-ffmpeg
                cmd = 'ffmpeg -i "snap.%05d.v1.png" ' + 'mov.hydrogen_bonding_pattern.%s' % sn + '.mp4'
                bash(cmd, cwd=out_dn)
            except:
                status('failed to render the video. try "%s" in %s' %
                       (cmd, out_dn))
        del lipid_mesh
Beispiel #5
0
def hydration(grofile,trajfile,**kwargs):

	"""
	Hydration code revamped from simuluxe on 2017.6.21.
	"""

	#---unpack
	sn = kwargs['sn']
	work = kwargs['workspace']
	calc = kwargs['calc']
	debug = kwargs.get('debug',False)
	run_parallel = kwargs.get('run_parallel',True)
	start_job_time = time.time()

	#---prepare universe	
	uni = MDAnalysis.Universe(grofile,trajfile)
	nframes = len(uni.trajectory)
	lenscale = 10.

	#---get selections
	if ',' in work.meta[sn]['cation']: cation = work.meta[sn]['cation_relevant']
	else: cation = work.meta[sn]['cation']
	sel_ions = uni.select_atoms('name %s'%cation)
	sel_lipids_str = ' or '.join(['resname %s'%i for i in work.vars['selectors']['resnames_lipid']])
	sel_lipids = uni.select_atoms(sel_lipids_str)
	#---atom subselection
	atom_filter = calc['specs'].get('atom_filter',None)
	if atom_filter:
		sel_lipids = uni.select_atoms('(%s) and (%s)'%(sel_lipids_str,' or '.join(['name %s'%i for i in 
			np.unique([n for n in sel_lipids.names if re.match(atom_filter,n)])])))
	#---handle the distance metric
	distance_metric = calc['specs'].get('distance_metric',None)
	#---pass the distance metric to the distance finder
	distance_args = {'distance_metric':distance_metric}
	#---we use the water oxygen only
	sel_water = uni.select_atoms('name OW')
	global pts_ions,pts_water,pts_lipids,vecs,midplanes
	#---the height distance metric needs the average z for each frame
	if distance_metric=='z': 
		midplanes = np.array([i.mean() for i in kwargs['upstream']['undulations']['mesh'].mean(axis=0)])
	#---cache the points
	pts_ions = np.zeros((nframes,len(sel_ions),3))
	pts_lipids = np.zeros((nframes,len(sel_lipids),3))
	pts_water = np.zeros((nframes,len(sel_water),3))
	vecs = np.zeros((nframes,3))
	start = time.time()
	for fr in range(nframes):
		status('caching coordinates',tag='compute',i=fr,looplen=nframes,start=start)	
		uni.trajectory[fr]
		pts_ions[fr] = sel_ions.positions/lenscale
		pts_lipids[fr] = sel_lipids.positions/lenscale
		pts_water[fr] = sel_water.positions/lenscale
		vecs[fr] = uni.dimensions[:3]/lenscale

	#---prepare arguments for the compute functions
	hydration_cutoff = work.vars['hydration_cutoffs'][cation]/lenscale
	out_args = dict(cutoff=hydration_cutoff)
	if False:
		if debug:
			fr = 36
			shell_counts = shell_counter(fr,**out_args)
			near_lipids = minimum_distances(fr,**distance_args)
			import ipdb;ipdb.set_trace()
			sys.exit()

	#---compute the waters in the shell
	shell_counts = basic_compute_loop(
		compute_function=shell_counter,
		looper=[dict(fr=fr,**out_args) for fr in range(nframes)],
		run_parallel=run_parallel,debug=None)
	#---select valid frames
	valid_frames_shell_counts = np.array([i for i in range(len(shell_counts)) if len(shell_counts[i])>0])
	shell_counts = np.array(shell_counts)[valid_frames_shell_counts.astype(int)]
	#---for each ion get the minimum distance to any lipid
	near_lipids = basic_compute_loop(
		compute_function=minimum_distances,
		looper=[dict(fr=fr,**distance_args) for fr in range(nframes)],
		run_parallel=run_parallel,debug=None)
	#---select valid frames
	valid_frames_near_lipids = np.array([i for i in range(len(near_lipids)) if len(near_lipids[i])>0])
	near_lipids = np.array(near_lipids)[valid_frames_near_lipids.astype(int)]

	if False:
		#---for each ion count the waters within the shell
		start = time.time()
		if run_parallel:
			shell_counts = Parallel(n_jobs=8,verbose=10 if debug else 0)(
				delayed(shell_counter,has_shareable_memory)(fr,**out_args) 
				for fr in framelooper(nframes,start=start))
		else: 
			shell_counts = []
			for fr in framelooper(nframes):
				shell_counts.append(shell_counter(fr,**out_args))
		valid_frames_shell_counts = np.array([i for i in range(len(shell_counts)) if len(shell_counts[i])>0])
		if len(valid_frames_shell_counts)==0: 
			print('something is amiss you have no valid frames')
			import ipdb;ipdb.set_trace()
		shell_counts = np.array(shell_counts)[valid_frames_shell_counts]

	if False:
		#---! note some repetition in the debug/parallel/serial blocks in many functions
		#---for each ion get the minimum distance to any lipid
		start = time.time()
		if run_parallel:
			near_lipids = Parallel(n_jobs=8,verbose=10 if debug else 0)(
				delayed(minimum_distances,has_shareable_memory)(fr,**distance_args) 
				for fr in framelooper(nframes,start=start))
		else: 
			near_lipids = []
			for fr in framelooper(nframes):
				near_lipids.append(minimum_distances(fr,**distance_args))
		valid_frames_near_lipids = np.array([i for i in range(len(near_lipids)) if len(near_lipids[i])>0])
		near_lipids = np.array(near_lipids)[valid_frames_near_lipids]

	#---package the dataset
	result,attrs = {},{}
	#---everything is indexed by idx
	attrs['hydration_cutoff'] = hydration_cutoff
	result['nframes'] = np.array(nframes)
	result['shell_counts'] = shell_counts
	result['valid_frames_shell_counts'] = valid_frames_shell_counts
	result['near_lipids'] = near_lipids
	result['valid_frames_near_lipids'] = valid_frames_near_lipids
	status('compute job lasted %.1fmin'%((time.time()-start_job_time)/60.),tag='time')
	return result,attrs
Beispiel #6
0
def electron_density_profiles(**kwargs):
    """
	Compute the electron density profiles.
	"""
    global vecs, coords, nbins, groups, midpoint, charges

    # hardcoded settings
    chargedict = {
        '^N(?!A$)': 7,
        '^C[0-9]+': 6,
        '^CL$': 17,
        '^H': 1,
        '^O': 8,
        '^P': 15,
        '^Cal': 18,
        '^MG': 10,
        '^NA': 11,
        '^S': 16,
        'K': 18
    }
    # we consider residues then the following regular expressions
    group_regexes = kwargs['calc']['specs'].get(
        'extra_regexes', ['.+', '^(OW)|(HW(1|2))$', '^C[0-9]+'])
    # get the reference z for each frame
    bilayer_coms = kwargs['upstream']['lipid_abstractor']['points']
    imono = kwargs['upstream']['lipid_abstractor']['monolayer_indices']
    #! assume upstream lipid_abstractor is correct and the resulting points are not broken over PBCs
    midpoint = np.array([
        bilayer_coms[:, imono == mn][:, :, 2].mean(axis=1) for mn in range(2)
    ]).mean(axis=0)
    # get the trajectory
    grofile, trajfile = kwargs['structure'], kwargs['trajectory']
    uni = MDAnalysis.Universe(grofile, trajfile)
    nframes = len(uni.trajectory)
    # MDAnalysis uses Angstroms not nm
    lenscale = 10.
    # choose a number of bins
    bin_size = kwargs['calc']['specs']['bin_size']
    vecs_upstream = kwargs['upstream']['lipid_abstractor']['vecs']
    # round the number of bins to ensure everything is flush
    nbins = np.round(vecs_upstream[:, 2].mean() / bin_size).astype(int)
    # collect coordinates
    sel = uni.select_atoms('all')
    # assign charges
    namelist = uni.atoms.names
    resnamelist = list(set(uni.atoms.resnames))
    # charge dictionary for the atoms in this particular system
    chargedict_obs = dict([
        (name, [chargedict[key] for key in chargedict if re.match(key, name)])
        for name in np.unique(namelist)
    ])
    unclear_charges = dict([(key, val) for key, val in chargedict_obs.items()
                            if len(val) != 1])
    if any(unclear_charges):
        raise Exception('charges for these atoms were not specified: %s' %
                        unclear_charges)
    chargedict_obs = dict([(key, val[0])
                           for key, val in chargedict_obs.items()])
    charges = np.array([chargedict_obs[n] for n in namelist])
    # identify atoms for each residue type
    groups = [np.where(uni.atoms.resnames == r)[0] for r in resnamelist]
    groups += [
        np.array([i for i, j in enumerate(namelist) if re.match(reg, j)])
        for reg in group_regexes
    ]
    # cache the points
    coords = np.zeros((nframes, len(sel), 3))
    vecs = np.zeros((nframes, 3))
    for fr in range(nframes):
        status('loading frame', tag='load', i=fr, looplen=nframes)
        uni.trajectory[fr]
        vecs[fr] = uni.trajectory[fr].dimensions[:3] / lenscale
        coords[fr] = np.array(sel.positions) / lenscale
    # make sure vectors are the same
    if not np.all(vecs == vecs_upstream):
        raise Exception('vectors do not match upstream lipid_abstractor')
    # compute
    looper = [dict(fr=fr) for fr in range(nframes)]
    incoming = basic_compute_loop(compute_edp_single,
                                  looper,
                                  run_parallel=True)
    # pack
    results, attrs = {}, {}
    attrs['group_regexes'] = group_regexes
    for gnum, group in enumerate(groups):
        results['group_%d' % gnum] = group
    results['resnames'] = resnamelist
    results['tabulated'] = np.array(incoming)
    attrs['bin_size'] = bin_size
    results['midpoint'] = midpoint
    attrs['nbins'] = nbins
    results['vecs'] = vecs
    results['charges'] = charges
    return results, attrs
def hydration_distribution(grofile, trajfile, **kwargs):
    """
	Compute the radial distribution function (RDF) a.k.a g(r) of water around ions but filter these distributions
	"""

    #---unpack
    sn = kwargs['sn']
    work = kwargs['workspace']
    calc = kwargs['calc']
    debug = kwargs.get('debug', False)
    run_parallel = kwargs.get('run_parallel', True)
    start_job_time = time.time()
    #---nearest water distances to calculate
    knn = calc['specs'].get('k_nearest_waters', 200)

    #---prepare universe
    uni = MDAnalysis.Universe(grofile, trajfile)
    nframes = len(uni.trajectory)
    lenscale = 10.

    #---collect selection strings
    lipid_resnames = get_lipid_resnames()
    cation_names = work.meta[sn].get('cations',
                                     work.meta[sn].get('cation', None))
    if type(cation_names) != list: cation_names = [cation_names]

    #---define selections
    sel_proxy = uni.select_atoms(' or '.join(
        ['resname %s' % i for i in lipid_resnames]))
    sel_subject = uni.select_atoms(' or '.join(
        ['name %s' % i for i in cation_names]))
    #---we use oxygen to denote the water
    sel_water = uni.select_atoms('resname SOL and name OW')

    #---prepare coordinates for each frame
    st = time.time()
    global vecs, subject_coords, proxy_coords, water_coords
    vecs, subject_coords, proxy_coords, water_coords = [], [], [], []
    #---purposefully profligate with the memory so this goes quickly
    for fr in range(nframes):
        status('caching coordinates',
               tag='compute',
               i=fr,
               looplen=nframes,
               start=st)
        uni.trajectory[fr]
        vecs.append(uni.dimensions[:3] / lenscale)
        subject_coords.append(sel_subject.positions / lenscale)
        proxy_coords.append(sel_proxy.positions / lenscale)
        water_coords.append(sel_water.positions / lenscale)
    status('completed caching in %.1f minutes' % ((time.time() - st) / 60.),
           tag='status')

    #---convert back to advanced indexing
    aind = lambda x: tuple(x.T)

    water_distances, lipid_distances, valid_frames = [], [], []
    #---loop over frames
    st = time.time()
    looper = [dict(fr=fr, knn=knn) for fr in range(nframes)]
    incoming = basic_compute_loop(hydration_distribution_framewise,
                                  looper=looper)
    water_distances, lipid_distances, valid_frames = zip(*incoming)
    valid_frames = [fr for fr in valid_frames if fr != None]
    water_distances = [water_distances[fr] for fr in valid_frames]
    lipid_distances = [lipid_distances[fr] for fr in valid_frames]

    #---package the dataset
    result, attrs = {}, {}
    result['water_distances'] = np.array(water_distances)
    result['lipid_distances'] = np.array(lipid_distances)
    result['valid_frames'] = valid_frames
    result['nframes'] = np.array(nframes)
    result['cation_resids'] = sel_subject.resids
    return result, attrs