def identify_lipid_leaflets(pts, vec, monolayer_cutoff=2.0, monolayer_cutoff_retry=True, max_count_asymmetry=0.05, pbc_rewrap=True, topologize_tolerance=None): """ Identify leaflets in a bilayer by consensus. Note that the time limit on the topologize call was increased from 10 to 30 for large systems. """ #---time limit on the tolerance checker try: with time_limit(30): wrapper = topologize( pts, vec, **({ 'tol': topologize_tolerance } if topologize_tolerance else {})) except TimeoutException, msg: status( 'topologize failed to join the bilayer. ' 'if it is broken over PBCs e.g. a saddle, this is a serious error which may go undetected. ' 'make sure you always inspect the topology later.', tag='error') wrapper = np.zeros((len(pts), 3))
def identify_leaflets_cluster(self,pts,vec,topologize_time_limit=30,max_count_asymmetry=0.05): """ Use scikit-learn clustering methods to separate leaflets. Note that this method can cluster a tortuous manifold and may work for complex morphologies. """ import scipy import sklearn import sklearn.neighbors import sklearn.cluster nlipids = len(pts) #---time limit on the topologize function which joins broken bilayers e.g. a saddle that crosses PBCs try: with time_limit(topologize_time_limit): wrapper = topologize(pts,vec, **({'tol':self.topologize_tolerance} if self.topologize_tolerance else {})) except TimeoutException, msg: status('topologize failed to join the bilayer. ' 'if it is broken over PBCs e.g. a saddle, this is a serious error which may go undetected. ' 'make sure you always inspect the topology later.',tag='error') wrapper = np.zeros((len(pts),3))
def identify_lipid_leaflets_legacy(pts,vec,monolayer_cutoff, monolayer_cutoff_retry=True,max_count_asymmetry=0.05,pbc_rewrap=True, topologize_tolerance=None,topologize_time_limit=30): """ Identify leaflets in a bilayer by consensus. Note that the time limit on the topologize call was increased from 10 to 30 for large systems. This is the legacy version of this algorithm. Previously it was recursive, lowering the cutoff by small increments and then calling itself again if the bilayer did not appear to be split correctly. The current version is called by the LeafletFinder class and throws exceptions to trigger a lower cutoff. We have tried to preserve the legacy version for other users, but the cluster version is more reliable. """ #---previous default was somewhat high, but typically came in from specs, and we reduced it incrementally if monolayer_cutoff==None: monolayer_cutoff = 2.0 #---time limit on the tolerance checker try: with time_limit(topologize_time_limit): wrapper = topologize(pts,vec, **({'tol':topologize_tolerance} if topologize_tolerance else {})) except TimeoutException, msg: status('topologize failed to join the bilayer. ' 'if it is broken over PBCs e.g. a saddle, this is a serious error which may go undetected. ' 'make sure you always inspect the topology later.',tag='error') wrapper = np.zeros((len(pts),3))
def identify_leaflets_cluster(self, pts, vec, topologize_time_limit=30, max_count_asymmetry=0.05): """ Use scikit-learn clustering methods to separate leaflets. Note that this method can cluster a tortuous manifold and may work for complex morphologies. """ import scipy import sklearn import sklearn.neighbors import sklearn.cluster nlipids = len(pts) #---time limit on the topologize function which joins broken bilayers e.g. a saddle that crosses PBCs try: with time_limit(topologize_time_limit): wrapper = topologize( pts, vec, **({ 'tol': self.topologize_tolerance } if self.topologize_tolerance else {})) except TimeoutException: status( 'topologize failed to join the bilayer. ' 'if it is broken over PBCs e.g. a saddle, this is a serious error which may go undetected. ' 'make sure you always inspect the topology later.', tag='error') wrapper = np.zeros((len(pts), 3)) findframe = pts + wrapper * np.array(vec) #---ensure that all points are in the box findframe += vec * (findframe < 0) - vec * (findframe > vec) #---previous calculation of connectivity was done manually if False: #---conservative cutoff gets lots of nearby points cutoff = 10.0 cutoff_short = 2.0 #---make a K-D tree from the points tree = scipy.spatial.ckdtree.cKDTree(findframe, boxsize=np.concatenate( (vec, vec)) + 0. * eps) #---find the nearest reference points for each instantaneous point close, nns = tree.query(findframe, distance_upper_bound=cutoff, k=20) #---construct the neighbor list subjects = np.where(np.all((close < cutoff, close > 0), axis=0)) #---get the pairs of neighbors subjects, neighbors = subjects[0], nns[subjects] pds = np.ones((nlipids, nlipids)) * 0.0 pds[tuple((np.arange(nlipids), np.arange(nlipids)))] = 0.0 nears = np.where(np.all((close > 0, close <= cutoff_short), axis=0)) pds[tuple((nears[0], nns[nears]))] = 1.0 #close[nears] pds[tuple((nns[nears], nears[0]))] = 1.0 #close[nears] connectivity = sklearn.neighbors.kneighbors_graph( findframe, n_neighbors=self.cluster_neighbors, include_self=False) ward = sklearn.cluster.AgglomerativeClustering( n_clusters=2, connectivity=connectivity, linkage='complete').fit(findframe) imono = ward.labels_ if np.mean(imono) == 0.5: status('[STATUS] perfect split is %0.5f' % np.mean(imono)) elif (np.all(np.array(imono) == 0) or np.all(np.array(imono) == 1) or np.abs(np.mean(imono) - 0.5) >= max_count_asymmetry): status('[STATUS] split is %0.5f' % np.mean(imono)) status('[STATUS] one side has %d' % np.sum(imono)) status('[WARNING] leaflets were not distinguished') raise Exception( '[ERROR] failed to identify leaflets. ' 'DEVELOPMENT NOTE!? use legacy or a different cutoff?') else: status('[STATUS] some lipids might be flipped %d %.5f' % (np.sum(imono), np.mean(imono))) return np.array(imono)
def identify_lipid_leaflets_legacy(pts, vec, monolayer_cutoff, monolayer_cutoff_retry=True, max_count_asymmetry=0.05, pbc_rewrap=True, topologize_tolerance=None, topologize_time_limit=30): """ Identify leaflets in a bilayer by consensus. Note that the time limit on the topologize call was increased from 10 to 30 for large systems. This is the legacy version of this algorithm. Previously it was recursive, lowering the cutoff by small increments and then calling itself again if the bilayer did not appear to be split correctly. The current version is called by the LeafletFinder class and throws exceptions to trigger a lower cutoff. We have tried to preserve the legacy version for other users, but the cluster version is more reliable. """ #---previous default was somewhat high, but typically came in from specs, and we reduced it incrementally if monolayer_cutoff == None: monolayer_cutoff = 2.0 #---time limit on the tolerance checker try: with time_limit(topologize_time_limit): wrapper = topologize( pts, vec, **({ 'tol': topologize_tolerance } if topologize_tolerance else {})) except TimeoutException: status( 'topologize failed to join the bilayer. ' 'if it is broken over PBCs e.g. a saddle, this is a serious error which may go undetected. ' 'make sure you always inspect the topology later.', tag='error') wrapper = np.zeros((len(pts), 3)) findframe = pts + wrapper * np.array(vec) status('this step is somewhat slow. it uses scipy.spatial.pdist.', tag='warning') pd = [ scipy.spatial.distance.squareform( scipy.spatial.distance.pdist(findframe[:, d:d + 1])) for d in range(3) ] if pbc_rewrap: pd3pbc = np.sqrt( np.sum(np.array([ pd[d] - (pd[d] > vec[d] / 2.) * vec[d] + (pd[d] < -1 * vec[d] / 2.) * vec[d] for d in range(3) ])**2, axis=0)) else: pd3pbc = pd nbors = np.transpose(np.where(pd3pbc < monolayer_cutoff)) nlipids = len(pts) imono = np.zeros(nlipids) nlist = [] for i in range(nlipids): status('cataloging lipids', i=i, looplen=nlipids, tag='compute') nlist.append(nbors[np.where(nbors[:, 0] == i)[0], 1]) iref = 0 mono = np.zeros(nlipids) searched = np.zeros(nlipids) imono[iref], searched[iref] = 1, 1 imono[nlist[iref]] = 1 while np.any(np.all((imono == 1, searched == 0), axis=0)): for iref in np.where(np.all((imono == 1, searched == 0), axis=0))[0]: imono[nlist[iref]] = 1 searched[iref] = 1 #---check that the leaflets were properly distinguished by looking at the number in each monolayer if np.mean(imono) == 0.5: status('[STATUS] perfect split is %0.5f' % np.mean(imono)) return imono elif (monolayer_cutoff_retry and (np.all(np.array(imono) == 0) or np.all(np.array(imono) == 1) or np.abs(np.mean(imono) - 0.5) >= max_count_asymmetry)): status('[STATUS] split is %0.5f' % np.mean(imono)) status('[STATUS] one side has %d' % np.sum(imono)) status('[WARNING] leaflets were not distinguished') status('[COMPUTE] leaflets = ' + str(np.sum(imono)) + '/' + str(len(imono))) status('[WARNING] previous monolayer_cutoff = ' + str(monolayer_cutoff)) raise Exception( '[ERROR] failed to identify leaflets so we are returning an exception to the LeafletFinder' ) else: status('[STATUS] some lipids might be flipped %d %.5f' % (np.sum(imono), np.mean(imono))) return imono