def get_layer_heights_kmeans(traj, cell, n, surface_normal=np.array([0, 0, 1])): """Find the heights of the centers of the layers, along `surface_normal`, in `traj`. Uses k-means over all (`surface_normal`-relative) heights in the trajectory. Args: - traj (ndarray n_frames x n_atoms x 3) - cell (ndarray 3x3 matrix) - n (int): The number of layers to identify (the k for k-means). - surface_normal (3-vector): A unit vector normal to the surface. Defaults to the z direction <0, 0, 1>. Returns: sorted ndarray of heights along surface normal """ from sklearn.cluster import KMeans # We have to wrap first to get consistant results along the surface normal traj = traj.copy().reshape(-1, 3) pbcc = PBCCalculator(cell) pbcc.wrap_points(traj) heights = np.dot(surface_normal, traj.T) kmeans = KMeans(n_clusters=n).fit(heights.reshape(-1, 1)) heights = kmeans.cluster_centers_.reshape(-1) heights.sort() return heights
def calculate_coord_numbers(traj, atoms, cutoff): """Compute the coordination numbers for `mask` atoms at all times in `traj`. Args: - traj (ndarray n_frames x n_atoms x 3) - mask (ndarray bool n_atoms) - atoms (ase.Atoms) - cutoff (float, distance units) - skin (float, distance units, default: 0) Returns: ndarray of int, n_frames x n_atoms """ n_atoms = len(atoms) # Prealloc buffers out = np.full(shape=(len(traj), n_atoms), fill_value=-1, dtype=np.int) distbuf = np.empty(shape=(n_atoms, n_atoms)) neighborbuf = np.empty(shape=(n_atoms, n_atoms), dtype=np.bool) pbcc = PBCCalculator(atoms.cell) for f_idex, frame in enumerate(tqdm(traj)): pbcc.pairwise_distances(frame, out=distbuf) np.less_equal(distbuf, cutoff, out=neighborbuf) np.sum(neighborbuf, axis=1, out=out[f_idex]) out -= 1 # Previous sum always counted atom itself in its CN, which is wrong assert np.min(out) >= 0 return out
def run(self, st): """ Args: st (SiteTrajectory) Returns: A ``SiteNetwork``. """ assert isinstance(st, SiteTrajectory) if st.real_trajectory is None: raise ValueError( "SiteTrajectory must have associated real trajectory.") pbcc = PBCCalculator(st.site_network.structure.cell) # Maximum length centers = np.empty(shape=(self.n * st.site_network.n_sites, 3), dtype=st.real_trajectory.dtype) centers.fill(np.nan) types = np.empty(shape=centers.shape[0], dtype=np.int) types.fill(np.nan) current_idex = 0 for site in range(st.site_network.n_sites): if self.weighted: pts, confs = st.real_positions_for_site( site, return_confidences=True) else: pts = st.real_positions_for_site(site) confs = np.ones(shape=len(pts), dtype=np.int) old_idex = current_idex if len(pts) > self.n: sanity = 0 for i in range(self.n): ps = pts[i::self.n] sanity += len(ps) c = confs[i::self.n] centers[current_idex] = pbcc.average(ps, weights=c) current_idex += 1 assert sanity == len(pts) assert current_idex - old_idex == self.n else: if self.error_on_insufficient: raise ValueError( "Insufficient points assigned to site %i (%i) to take %i averages." % (site, len(pts), self.n)) centers[current_idex:current_idex + len(pts)] = pts current_idex += len(pts) types[old_idex:current_idex] = site sn = st.site_network.copy() sn.centers = centers[:current_idex] sn.site_types = types[:current_idex] assert not (np.isnan(np.sum(sn.centers)) or np.isnan(np.sum(sn.site_types))) return sn
def compute_volumes(self, sn): """Computes the volume of the convex hull defined by each sites' static verticies. Requires vertex information in the SiteNetwork. Adds the ``site_volumes`` and ``site_surface_areas`` attributes. Volumes can be NaN for degenerate hulls/point sets on which QHull fails. Args: - sn (SiteNetwork) """ assert isinstance(sn, SiteNetwork) if sn.vertices is None: raise ValueError( "SiteNetwork must have verticies to compute volumes!") vols = np.empty(shape=sn.n_sites, dtype=np.float) areas = np.empty(shape=sn.n_sites, dtype=np.float) pbcc = PBCCalculator(sn.structure.cell) for site in range(sn.n_sites): pos = sn.static_structure.positions[list(sn.vertices[site])] if len(pos) < 4: if self.error_on_insufficient_coord: raise InsufficientCoordinatingAtomsError( "Site %i had only %i vertices (less than needed 4)" % (site, len(pos))) else: vols[site] = 0 areas[site] = np.nan continue assert pos.flags[ 'OWNDATA'] # It should since we're indexing with index lists # Recenter offset = pbcc.cell_centroid - sn.centers[site] pos += offset pbcc.wrap_points(pos) try: hull = ConvexHull(pos) vols[site] = hull.volume areas[site] = hull.area except QhullError as qhe: logger.warning( "Had QHull failure when computing volume of site %i" % site) vols[site] = np.nan areas[site] = np.nan sn.add_site_attribute('site_volumes', vols) sn.add_site_attribute('site_surface_areas', areas)
def compute_accessable_volumes(self, st, n_recenterings=8): """Computes the volumes of convex hulls around all positions associated with a site. Uses the shift-and-wrap trick for dealing with periodicity, so sites that take up the majority of the unit cell may give bogus results. Adds the ``accessable_site_volumes`` attribute to the ``SiteNetwork``. Args: st (SiteTrajectory) n_recenterings (int): How many different recenterings to try (the algorithm will recenter around n of the points and take the minimal resulting volume; this deals with cases where there is one outlier where recentering around it gives very bad results.) """ assert isinstance(st, SiteTrajectory) vols = np.empty(shape=st.site_network.n_sites, dtype=np.float) areas = np.empty(shape=st.site_network.n_sites, dtype=np.float) pbcc = PBCCalculator(st.site_network.structure.cell) for site in range(st.site_network.n_sites): pos = st.real_positions_for_site(site) assert pos.flags['OWNDATA'] vol = np.inf area = None for i in range(n_recenterings): # Recenter offset = pbcc.cell_centroid - pos[int( i * (len(pos) / n_recenterings))] pos += offset pbcc.wrap_points(pos) try: hull = ConvexHull(pos) except QhullError as qhe: logger.warning("For site %i, iter %i: %s" % (site, i, qhe)) vols[site] = np.nan areas[site] = np.nan continue if hull.volume < vol: vol = hull.volume area = hull.area vols[site] = vol areas[site] = area st.site_network.add_site_attribute('accessable_site_volumes', vols)
def plot_site(self, site, **kwargs): pbcc = PBCCalculator(self._sn.structure.cell) pts = self.real_positions_for_site(site).copy() offset = pbcc.cell_centroid - pts[3] pts += offset pbcc.wrap_points(pts) lattice_pos = self._sn.static_structure.positions.copy() lattice_pos += offset pbcc.wrap_points(lattice_pos) site_pos = self._sn.centers[site:site + 1].copy() site_pos += offset pbcc.wrap_points(site_pos) # Plot point cloud plot_points(points=pts, alpha=0.3, marker='.', color='k', **kwargs) # Plot site plot_points(points=site_pos, color='cyan', **kwargs) # Plot everything else plot_atoms(self._sn.static_structure, positions=lattice_pos, **kwargs) title = "Site %i/%i" % (site, len(self._sn)) if not self._sn.site_types is None: title += " (type %i)" % self._sn.site_types[site] kwargs['ax'].set_title(title)
def _get_sites_to_merge(self, st): # -- Compute jump statistics if not st.site_network.has_attribute('n_ij'): ja = JumpAnalysis() ja.run(st) pbcc = PBCCalculator(st.site_network.structure.cell) site_centers = st.site_network.centers # -- Build connectivity_matrix connectivity_matrix = self.connectivity_matrix_generator( st.site_network).copy() n_sites_before = st.site_network.n_sites assert n_sites_before == connectivity_matrix.shape[0] centers_before = st.site_network.centers # For diagnostic purposes no_diag_graph = connectivity_matrix.astype(dtype=np.float, copy=True) np.fill_diagonal(no_diag_graph, np.nan) # Rather arbitrary, but this is really just an alarm for if things # are really, really wrong edge_threshold = np.nanmean( no_diag_graph) + 3 * np.nanstd(no_diag_graph) n_alarming_ignored_edges = 0 # Apply distance threshold for i in range(n_sites_before): dists = pbcc.distances(centers_before[i], centers_before[i + 1:]) js_too_far = np.where(dists > self.distance_threshold)[0] js_too_far += i + 1 if np.any(connectivity_matrix[i, js_too_far] > edge_threshold) or \ np.any(connectivity_matrix[js_too_far, i] > edge_threshold): n_alarming_ignored_edges += 1 connectivity_matrix[i, js_too_far] = 0 connectivity_matrix[js_too_far, i] = 0 # Symmetry if n_alarming_ignored_edges > 0: logger.warning( " At least %i site pairs with high (z-score > 3) fluxes were over the given distance cutoff.\n" " This may or may not be a problem; but if `distance_threshold` is low, consider raising it." % n_alarming_ignored_edges) # -- Do Markov Clustering clusters = markov_clustering(connectivity_matrix, **self.markov_parameters) return clusters
def run(self, sn): assert isinstance(sn, SiteNetwork) out = sn.copy() pbcc = PBCCalculator(sn.structure.cell) newcenters = out.centers.repeat(self.n, axis = 0) assert len(newcenters) == self.n * len(out.centers) newcenters += self.sigma * np.random.standard_normal(size = newcenters.shape) pbcc.wrap_points(newcenters) out.centers = newcenters return out
def _get_sites_to_merge(self, st, threshold=0): sn = st.site_network attrmat = getattr(sn, self.attrname) assert attrmat.shape == ( sn.n_sites, sn.n_sites ), "`attrname` doesn't seem to indicate an edge property." connmat = self.relation(attrmat, threshold) # Apply distance threshold if self.distance_threshold < np.inf: pbcc = PBCCalculator(sn.structure.cell) centers = sn.centers for i in range(sn.n_sites): dists = pbcc.distances(centers[i], centers[i + 1:]) js_too_far = np.where(dists > self.distance_threshold)[0] js_too_far += i + 1 connmat[i, js_too_far] = False connmat[js_too_far, i] = False # Symmetry if self.forbid_multiple_occupancy: n_mobile = sn.n_mobile for frame in st.traj: frame = [s for s in frame if s >= 0] for site in frame: # only known # can't merge occupied site with other simulatanious occupied sites connmat[site, frame] = False # Everything is always mergable with itself. np.fill_diagonal(connmat, True) # Get mergable groups n_merged_sites, labels = connected_components( connmat, directed=self.directed, connection=self.connection) # MergeSites will check pairwise distances; we just need to make it the # right format. merge_groups = [] for lbl in range(n_merged_sites): merge_groups.append(np.where(labels == lbl)[0]) return merge_groups
def func(atoms, **kwargs): nonlocal pbcc, dmat, connmat, newtags, layer_mask # preallocate buffers if pbcc is None: pbcc = PBCCalculator(atoms.cell) dmat = np.empty(shape=(len(atoms), len(atoms))) connmat = np.empty(shape=(len(atoms), len(atoms)), dtype=np.bool) newtags = np.empty(shape=len(atoms), dtype=np.int) layer_mask = np.empty(shape=len(atoms), dtype=np.bool) tags = groupfunc(atoms, **kwargs) layers = np.unique(tags) layers.sort() newtags.fill(-1) pbcc.pairwise_distances(atoms.positions, out=dmat) np.less_equal(dmat, cutoff, out=connmat) agreegrp_conns = [] nexttag = 0 for layer in layers: np.equal(tags, layer, out=layer_mask) layer_conrows = connmat[layer_mask] layer_conmat = layer_conrows[:, layer_mask] n_groups_layer, group_tags = connected_components(layer_conmat, directed=False) group_tags += nexttag newtags[layer_mask] = group_tags neighbor_groups = newtags[np.logical_or.reduce(layer_conrows, axis=0)] agreegrp_conns.append(neighbor_groups) nexttag += n_groups_layer agreegrp_connmat = np.zeros(shape=(nexttag + 1, nexttag + 1), dtype=np.bool) for agreegrp, neighbors in enumerate(agreegrp_conns): agreegrp_connmat[agreegrp, neighbors] = True agreegrp_connmat = agreegrp_connmat[:-1, :-1] agreegrp_connmat |= agreegrp_connmat.T return newtags, np.arange(nexttag), agreegrp_connmat
def plot_atoms(atoms, positions = None, hide_species = (), wrap = False, fig = None, ax = None, i = None): mask = [not (e in hide_species) for e in atoms.get_chemical_symbols()] if positions is None: pts = atoms.get_positions() else: pts = positions pts = pts[mask] species = [s for i, s in enumerate(atoms.get_chemical_symbols()) if mask[i]] if wrap: pbcc = PBCCalculator(atoms.cell) pts = atoms.get_positions().copy() pbcc.wrap_points(pts) ax.scatter(pts[:,0], pts[:,1], pts[:,2], c = [color_for_species(s) for s in species], s = [20.0 * ase.data.covalent_radii[ase.data.atomic_numbers[s]] for s in species]) all_cvecs = [] whos_left = set(xrange(len(atoms.cell))) for i, cvec1 in enumerate(atoms.cell): all_cvecs.append(np.array([[0.0, 0.0, 0.0], cvec1])) for j, cvec2 in enumerate(atoms.cell[list(whos_left - {i})]): all_cvecs.append(np.array([cvec1, cvec1 + cvec2])) for i, cvec1 in enumerate(atoms.cell): start = np.sum(atoms.cell[list(whos_left - {i})], axis = 0) all_cvecs.append(np.array([start, start + cvec1])) for cvec in all_cvecs: ax.plot(cvec[:,0], cvec[:,1], cvec[:,2], color = "gray", alpha=0.5, linewidth = 0.7, linestyle="--") set_axes_equal(ax)
def replace_with_closer(st, mobile_atom, before_site, start_frame, after_site, end_frame): if before_site == SiteTrajectory.SITE_UNKNOWN or \ after_site == SiteTrajectory.SITE_UNKNOWN: return SiteTrajectory.SITE_UNKNOWN if pbcc is None: pbcc = PBCCalculator(st.site_network.structure.cell) n_frames = end_frame - start_frame out = np.empty(shape=n_frames) for i in range(n_frames): ptbuf[0] = st.site_network.centers[before_site] ptbuf[1] = st.site_network.centers[after_site] pbcc.distances(st.real_trajectory[start_frame + i, mobile_atom], ptbuf, in_place=True, out=distbuf) if distbuf[0] < distbuf[1]: out[i] = before_site else: out[i] = after_site return out
def cfunc(sn): jl = sn.jump_lag.copy() jl -= 1.0 # Center it around 1 since that's the minimum lag, 1 frame jl /= jump_lag_sigma np.square(jl, out=jl) jl *= -0.5 np.exp(jl, out=jl) # exp correctly takes the -infs to 0 jl[sn.jump_lag > jump_lag_cutoff] = 0. # Distance term pbccalc = PBCCalculator(sn.structure.cell) dists = pbccalc.pairwise_distances(sn.centers) dmat = dists.copy() # We want to strongly boost the similarity of *very* close sites dmat /= distance_sigma np.square(dmat, out=dmat) dmat *= -0.5 np.exp(dmat, out=dmat) return (sn.p_ij + jump_lag_coeff * jl) * (distance_coeff * dmat + (1 - distance_coeff))
def run(self, st): vols = np.empty(shape = st.site_network.n_sites, dtype = np.float) areas = np.empty(shape = st.site_network.n_sites, dtype = np.float) pbcc = PBCCalculator(st.site_network.structure.cell) for site in xrange(st.site_network.n_sites): pos = st.real_positions_for_site(site) assert pos.flags['OWNDATA'] vol = np.inf area = None for i in xrange(self.n_recenterings): # Recenter offset = pbcc.cell_centroid - pos[int(i * (len(pos)/self.n_recenterings))] pos += offset pbcc.wrap_points(pos) try: hull = ConvexHull(pos) except QhullError as qhe: print "For site %i, iter %i: %s" % (site, i, qhe) vols[site] = np.nan areas[site] = np.nan continue if hull.volume < vol: vol = hull.volume area = hull.area vols[site] = vol areas[site] = area st.site_network.add_site_attribute('site_volumes', vols) st.site_network.add_site_attribute('site_surface_areas', areas)
class LandmarkAnalysis(object): """Track a mobile species through a fixed lattice using landmark vectors.""" def __init__(self, clustering_algorithm='dotprod', clustering_params={}, cutoff=2.0, minimum_site_occupancy=0.1, peak_evening='none', weighted_site_positions=True, check_for_zero_landmarks=True, static_movement_threshold=1.0, dynamic_lattice_mapping=False, relaxed_lattice_checks=False, max_mobile_per_site=1, force_no_memmap=False, verbose=True): """ :param double cutoff: The distance cutoff for the landmark vectors. (unitless) :param double minimum_site_occupancy = 0.1: Minimum occupancy (% of time occupied) for a site to qualify as such. :param dict clustering_params: Parameters for the chosen clustering_algorithm :param str peak_evening: Whether and what kind of peak "evening" to apply; that is, processing that makes all large peaks in the landmark vector more similar in magnitude. This can help in site clustering. Valid options: 'none', 'clip' :param bool weighted_site_positions: When computing site positions, whether to weight the average by assignment confidence. :param bool check_for_zero_landmarks: Whether to check for and raise exceptions when all-zero landmark vectors are computed. :param float static_movement_threshold: (Angstrom) the maximum allowed distance between an instantanous static atom position and it's ideal position. :param bool dynamic_lattice_mapping: Whether to dynamically decide each frame which static atom represents each average lattice position; this allows the LandmarkAnalysis to deal with, say, a rare exchage of two static atoms that does not change the structure of the lattice. It does NOT allow LandmarkAnalysis to deal with lattices whose structures actually change over the course of the trajectory. In certain cases this is better delt with by MergeSitesByDynamics. :param int max_mobile_per_site: The maximum number of mobile atoms that can be assigned to a single site without throwing an error. Regardless of the value, assignments of more than one mobile atom to a single site will be recorded and reported. Setting this to 2 can be necessary for very diffusive, liquid-like materials at high temperatures. Statistics related to this are reported in self.avg_mobile_per_site and self.n_multiple_assignments. :param bool force_no_memmap: if True, landmark vectors will be stored only in memory. Only useful if access to landmark vectors after the analysis has run is desired. :param bool verbose: If `True`, progress bars and messages will be printed to stdout. """ self._cutoff = cutoff self._minimum_site_occupancy = minimum_site_occupancy self._cluster_algo = clustering_algorithm self._clustering_params = clustering_params if not peak_evening in ['none', 'clip']: raise ValueError("Invalid value `%s` for peak_evening" % peak_evening) self._peak_evening = peak_evening self.verbose = verbose self.check_for_zero_landmarks = check_for_zero_landmarks self.weighted_site_positions = weighted_site_positions self.dynamic_lattice_mapping = dynamic_lattice_mapping self.relaxed_lattice_checks = relaxed_lattice_checks self._landmark_vectors = None self._landmark_dimension = None self.static_movement_threshold = static_movement_threshold self.max_mobile_per_site = max_mobile_per_site self.force_no_memmap = force_no_memmap self._has_run = False @property def cutoff(self): return self._cutoff @analysis_result def landmark_vectors(self): view = self._landmark_vectors[:] view.flags.writeable = False return view @analysis_result def landmark_dimension(self): return self._landmark_dimension def run(self, sn, frames): """Run the landmark analysis. The input SiteNetwork is a network of predicted sites; it's sites will be used as the "basis" for the landmark vectors. Takes a SiteNetwork and returns a SiteTrajectory. """ assert isinstance(sn, SiteNetwork) if self._has_run: raise ValueError("Cannot rerun LandmarkAnalysis!") if frames.shape[1:] != (sn.n_total, 3): raise ValueError("Wrong shape %s for frames." % frames.shape) if sn.vertices is None: raise ValueError("Input SiteNetwork must have vertices") n_frames = len(frames) if self.verbose: print "--- Running Landmark Analysis ---" # Create PBCCalculator self._pbcc = PBCCalculator(sn.structure.cell) # -- Step 1: Compute site-to-vertex distances self._landmark_dimension = sn.n_sites longest_vert_set = np.max([len(v) for v in sn.vertices]) verts_np = np.array( [v + [-1] * (longest_vert_set - len(v)) for v in sn.vertices]) site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float) site_vert_dists.fill(np.nan) for i, polyhedron in enumerate(sn.vertices): verts_poses = sn.static_structure.get_positions()[polyhedron] dists = self._pbcc.distances(sn.centers[i], verts_poses) site_vert_dists[i, :len(polyhedron)] = dists # -- Step 2: Compute landmark vectors if self.verbose: print " - computing landmark vectors -" # Compute landmark vectors # The dimension of one landmark vector is the number of Voronoi regions shape = (n_frames * sn.n_mobile, self._landmark_dimension) with tempfile.NamedTemporaryFile() as mmap_backing: if self.force_no_memmap: self._landmark_vectors = np.empty(shape=shape, dtype=np.float) else: self._landmark_vectors = np.memmap(mmap_backing.name, mode='w+', dtype=np.float, shape=shape) helpers._fill_landmark_vectors( self, sn, verts_np, site_vert_dists, frames, check_for_zeros=self.check_for_zero_landmarks, tqdm=tqdm) # -- Step 3: Cluster landmark vectors if self.verbose: print " - clustering landmark vectors -" # - Preprocess - self._do_peak_evening() # - Cluster - cluster_func = importlib.import_module( "..cluster." + self._cluster_algo, package=__name__).do_landmark_clustering cluster_counts, lmk_lbls, lmk_confs = \ cluster_func(self._landmark_vectors, clustering_params = self._clustering_params, min_samples = self._minimum_site_occupancy / float(sn.n_mobile), verbose = self.verbose) if self.verbose: print " Failed to assign %i%% of mobile particle positions to sites." % ( 100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls))) # reshape lables and confidences lmk_lbls.shape = (n_frames, sn.n_mobile) lmk_confs.shape = (n_frames, sn.n_mobile) n_sites = len(cluster_counts) if n_sites < sn.n_mobile: raise ValueError( "There are %i mobile particles, but only identified %i sites. Check clustering_params." % (sn.n_mobile, n_sites)) if self.verbose: print " Identified %i sites with assignment counts %s" % ( n_sites, cluster_counts) # Check that multiple particles are never assigned to one site at the # same time, cause that would be wrong. n_more_than_ones = 0 avg_mobile_per_site = 0 divisor = 0 for frame_i, site_frame in enumerate(lmk_lbls): _, counts = np.unique(site_frame[site_frame >= 0], return_counts=True) count_msk = counts > self.max_mobile_per_site if np.any(count_msk): raise ValueError( "%i mobile particles were assigned to only %i site(s) (%s) at frame %i." % (np.sum(counts[count_msk]), np.sum(count_msk), np.where(count_msk)[0], frame_i)) n_more_than_ones += np.sum(counts > 1) avg_mobile_per_site += np.sum(counts) divisor += len(counts) self.n_multiple_assignments = n_more_than_ones self.avg_mobile_per_site = avg_mobile_per_site / float(divisor) # -- Do output # - Compute site centers site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype) for site in xrange(n_sites): mask = lmk_lbls == site pts = frames[:, sn.mobile_mask][mask] if self.weighted_site_positions: site_centers[site] = self._pbcc.average( pts, weights=lmk_confs[mask]) else: site_centers[site] = self._pbcc.average(pts) # Build output obejcts out_sn = sn.copy() out_sn.centers = site_centers assert out_sn.vertices is None out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs) out_st.set_real_traj(frames) self._has_run = True return out_st # -------- "private" methods -------- def _do_peak_evening(self): if self._peak_evening == 'none': return elif self._peak_evening == 'clip': lvec_peaks = np.max(self._landmark_vectors, axis=1) # Clip all peaks to the lowest "normal" (stdev.) peak lvec_clip = np.mean(lvec_peaks) - np.std(lvec_peaks) # Do the clipping self._landmark_vectors[ self._landmark_vectors > lvec_clip] = lvec_clip
def run(self, sn, frames): """Run the landmark analysis. The input SiteNetwork is a network of predicted sites; it's sites will be used as the "basis" for the landmark vectors. Takes a SiteNetwork and returns a SiteTrajectory. """ assert isinstance(sn, SiteNetwork) if self._has_run: raise ValueError("Cannot rerun LandmarkAnalysis!") if frames.shape[1:] != (sn.n_total, 3): raise ValueError("Wrong shape %s for frames." % frames.shape) if sn.vertices is None: raise ValueError("Input SiteNetwork must have vertices") n_frames = len(frames) if self.verbose: print "--- Running Landmark Analysis ---" # Create PBCCalculator self._pbcc = PBCCalculator(sn.structure.cell) # -- Step 1: Compute site-to-vertex distances self._landmark_dimension = sn.n_sites longest_vert_set = np.max([len(v) for v in sn.vertices]) verts_np = np.array( [v + [-1] * (longest_vert_set - len(v)) for v in sn.vertices]) site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float) site_vert_dists.fill(np.nan) for i, polyhedron in enumerate(sn.vertices): verts_poses = sn.static_structure.get_positions()[polyhedron] dists = self._pbcc.distances(sn.centers[i], verts_poses) site_vert_dists[i, :len(polyhedron)] = dists # -- Step 2: Compute landmark vectors if self.verbose: print " - computing landmark vectors -" # Compute landmark vectors # The dimension of one landmark vector is the number of Voronoi regions shape = (n_frames * sn.n_mobile, self._landmark_dimension) with tempfile.NamedTemporaryFile() as mmap_backing: if self.force_no_memmap: self._landmark_vectors = np.empty(shape=shape, dtype=np.float) else: self._landmark_vectors = np.memmap(mmap_backing.name, mode='w+', dtype=np.float, shape=shape) helpers._fill_landmark_vectors( self, sn, verts_np, site_vert_dists, frames, check_for_zeros=self.check_for_zero_landmarks, tqdm=tqdm) # -- Step 3: Cluster landmark vectors if self.verbose: print " - clustering landmark vectors -" # - Preprocess - self._do_peak_evening() # - Cluster - cluster_func = importlib.import_module( "..cluster." + self._cluster_algo, package=__name__).do_landmark_clustering cluster_counts, lmk_lbls, lmk_confs = \ cluster_func(self._landmark_vectors, clustering_params = self._clustering_params, min_samples = self._minimum_site_occupancy / float(sn.n_mobile), verbose = self.verbose) if self.verbose: print " Failed to assign %i%% of mobile particle positions to sites." % ( 100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls))) # reshape lables and confidences lmk_lbls.shape = (n_frames, sn.n_mobile) lmk_confs.shape = (n_frames, sn.n_mobile) n_sites = len(cluster_counts) if n_sites < sn.n_mobile: raise ValueError( "There are %i mobile particles, but only identified %i sites. Check clustering_params." % (sn.n_mobile, n_sites)) if self.verbose: print " Identified %i sites with assignment counts %s" % ( n_sites, cluster_counts) # Check that multiple particles are never assigned to one site at the # same time, cause that would be wrong. n_more_than_ones = 0 avg_mobile_per_site = 0 divisor = 0 for frame_i, site_frame in enumerate(lmk_lbls): _, counts = np.unique(site_frame[site_frame >= 0], return_counts=True) count_msk = counts > self.max_mobile_per_site if np.any(count_msk): raise ValueError( "%i mobile particles were assigned to only %i site(s) (%s) at frame %i." % (np.sum(counts[count_msk]), np.sum(count_msk), np.where(count_msk)[0], frame_i)) n_more_than_ones += np.sum(counts > 1) avg_mobile_per_site += np.sum(counts) divisor += len(counts) self.n_multiple_assignments = n_more_than_ones self.avg_mobile_per_site = avg_mobile_per_site / float(divisor) # -- Do output # - Compute site centers site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype) for site in xrange(n_sites): mask = lmk_lbls == site pts = frames[:, sn.mobile_mask][mask] if self.weighted_site_positions: site_centers[site] = self._pbcc.average( pts, weights=lmk_confs[mask]) else: site_centers[site] = self._pbcc.average(pts) # Build output obejcts out_sn = sn.copy() out_sn.centers = site_centers assert out_sn.vertices is None out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs) out_st.set_real_traj(frames) self._has_run = True return out_st
def _get_sites_to_merge(self, st, coordinating_mask = None): sn = st.site_network # -- Compute jump statistics if not sn.has_attribute('n_ij'): ja = JumpAnalysis() ja.run(st) pos = sn.centers if coordinating_mask is None: coordinating_mask = sn.static_mask else: assert not np.any(coordinating_mask & sn.mobile_mask) # -- Build images mobile_idex = np.where(sn.mobile_mask)[0][0] one_mobile_structure = sn.structure[coordinating_mask] one_mobile_structure.extend(sn.structure[mobile_idex]) mobile_idex = -1 one_mobile_structure.set_calculator(self.calculator) interpolation_coeffs = np.linspace(0, 1, self.n_driven_images) energies = np.empty(shape = self.n_driven_images) # -- Decide on pairs to check pbcc = PBCCalculator(sn.structure.cell) dists = pbcc.pairwise_distances(pos) # At the start, all within distance cutoff are mergable mergable = dists <= self.maximum_pairwise_distance mergable &= sn.n_ij >= self.minimum_jumps_mergable # -- Check pairs' barriers # Symmetric, and diagonal is trivially true. Combinations avoids those cases. jbuf = pos[0].copy() first_calculate = True mergable_pairs = (p for p in itertools.combinations(range(sn.n_sites), r = 2) if mergable[p] or mergable[p[1], p[0]]) n_mergable = (np.sum(mergable) - sn.n_sites) // 2 for i, j in tqdm(mergable_pairs, total = n_mergable): jbuf[:] = pos[j] # Get minimage _ = pbcc.min_image(pos[i], jbuf) # Do coordinate driving vector = jbuf - pos[i] for image_i in range(self.n_driven_images): one_mobile_structure.positions[mobile_idex] = vector one_mobile_structure.positions[mobile_idex] *= interpolation_coeffs[image_i] one_mobile_structure.positions[mobile_idex] += pos[i] energies[image_i] = one_mobile_structure.get_potential_energy() first_calculate = False # Check barrier barrier_idex = np.argmax(energies) forward_barrier = energies[barrier_idex] - energies[0] backward_barrier = energies[barrier_idex] - energies[-1] # If it's an actual maxima barrier between them, then we want to # check its height if barrier_idex != 0 and barrier_idex != self.n_driven_images - 1: mergable[i, j] = forward_barrier <= self.barrier_threshold mergable[j, i] = backward_barrier <= self.barrier_threshold # Otherwise, if there's no maxima between them, they are in the same # basin. # Get mergable groups n_merged_sites, labels = connected_components( mergable, directed = True, connection = 'strong' ) # MergeSites will check pairwise distances; we just need to make it the # right format. merge_groups = [] for lbl in range(n_merged_sites): merge_groups.append(np.where(labels == lbl)[0]) return merge_groups
def run(self, st, **kwargs): """Takes a ``SiteTrajectory`` and returns a new ``SiteTrajectory``.""" if self.check_types and st.site_network.site_types is None: raise ValueError( "Cannot run a check_types=True MergeSites on a SiteTrajectory without type information." ) # -- Compute jump statistics pbcc = PBCCalculator(st.site_network.structure.cell) site_centers = st.site_network.centers if self.check_types: site_types = st.site_network.site_types clusters = self._get_sites_to_merge(st, **kwargs) old_n_sites = st.site_network.n_sites new_n_sites = len(clusters) logger.info( "After merging %i sites there will be %i sites for %i mobile particles" % (len(site_centers), new_n_sites, st.site_network.n_mobile)) if new_n_sites < st.site_network.n_mobile: raise InsufficientSitesError(verb="Merging", n_sites=new_n_sites, n_mobile=st.site_network.n_mobile) if self.check_types: new_types = np.empty(shape=new_n_sites, dtype=np.int) merge_verts = st.site_network.vertices is not None if merge_verts: new_verts = [] # -- Merge Sites new_centers = np.empty(shape=(new_n_sites, 3), dtype=st.site_network.centers.dtype) translation = np.empty(shape=st.site_network.n_sites, dtype=np.int) translation.fill(-1) for newsite in range(new_n_sites): mask = list(clusters[newsite]) # Update translation table if np.any(translation[mask] != -1): # We've assigned a different cluster for this before... weird # degeneracy raise ValueError( "Site merging tried to merge site(s) into more than one new site. This shouldn't happen." ) translation[mask] = newsite to_merge = site_centers[mask] # Check distances if not self.maximum_merge_distance is None: dists = pbcc.distances(to_merge[0], to_merge[1:]) if not np.all(dists <= self.maximum_merge_distance): raise MergedSitesTooDistantError( "Markov clustering tried to merge sites more than %.2f apart. Lower your distance_threshold?" % self.maximum_merge_distance) # New site center if self.weighted_spatial_average: new_centers[newsite] = pbcc.average(to_merge) else: occs = st.site_network.occupancies[mask] new_centers[newsite] = pbcc.average(to_merge, weights=occs) if self.check_types: assert np.all(site_types[mask] == site_types[mask][0]) new_types[newsite] = site_types[mask][0] if merge_verts: new_verts.append( set.union( *[set(st.site_network.vertices[i]) for i in mask])) newsn = st.site_network.copy() newsn.centers = new_centers if self.check_types: newsn.site_types = new_types if merge_verts: newsn.vertices = new_verts newtraj = translation[st._traj] newtraj[st._traj == SiteTrajectory.SITE_UNKNOWN] = SiteTrajectory.SITE_UNKNOWN # It doesn't make sense to propagate confidence information through a # transform that might completely invalidate it newst = SiteTrajectory(newsn, newtraj, confidences=None) if not st.real_trajectory is None: newst.set_real_traj(st.real_trajectory) if self.set_merged_into: if st.site_network.has_attribute("merged_into"): st.site_network.remove_attribute("merged_into") st.site_network.add_site_attribute("merged_into", translation) return newst
def run(self, st): """Takes a SiteTrajectory and returns a SiteTrajectory, including a new SiteNetwork.""" if self.check_types and st.site_network.site_types is None: raise ValueError( "Cannot run a check_types=True MergeSitesByDynamics on a SiteTrajectory without type information." ) # Compute jump statistics if not st.site_network.has_attribute('p_ij'): ja = JumpAnalysis(verbose=self.verbose) ja.run(st) pbcc = PBCCalculator(st.site_network.structure.cell) site_centers = st.site_network.centers if self.check_types: site_types = st.site_network.site_types connectivity_matrix = st.site_network.p_ij assert st.site_network.n_sites == connectivity_matrix.shape[0] clusters = self._markov_clustering(connectivity_matrix, **self.markov_parameters) new_n_sites = len(clusters) if self.verbose: print "After merge there will be %i sites" % new_n_sites if self.check_types: new_types = np.empty(shape=new_n_sites, dtype=np.int) new_centers = np.empty(shape=(new_n_sites, 3), dtype=st.site_network.centers.dtype) translation = np.empty(shape=st.site_network.n_sites, dtype=np.int) translation.fill(-1) for newsite in xrange(new_n_sites): mask = list(clusters[newsite]) # Update translation table if np.any(translation[mask] != -1): # We've assigned a different cluster for this before... weird # degeneracy raise ValueError( "Markov clustering tried to merge site(s) into more than one new site" ) translation[mask] = newsite to_merge = site_centers[mask] # Check distances dists = pbcc.distances(to_merge[0], to_merge[1:]) assert np.all( dists < self.distance_threshold ), "Markov clustering tried to merge sites more than %f apart -- this may be valid, and the distance threshold may need to be increased." % self.distance_threshold # New site center new_centers[newsite] = pbcc.average(to_merge) if self.check_types: assert np.all(site_types[mask] == site_types[mask][0]) new_types[newsite] = site_types[mask][0] newsn = st.site_network.copy() newsn.centers = new_centers if self.check_types: newsn.site_types = new_types newtraj = translation[st._traj] newtraj[st._traj == SiteTrajectory.SITE_UNKNOWN] = SiteTrajectory.SITE_UNKNOWN # It doesn't make sense to propagate confidence information through a # transform that might completely invalidate it newst = SiteTrajectory(newsn, newtraj, confidences=None) if not st.real_trajectory is None: newst.set_real_traj(st.real_trajectory) return newst
def _plot_edges(self, sn, ax = None, *args, **kwargs): if not 'intensity' in self.edge_mappings: return [] pbcc = PBCCalculator(sn.structure.cell) n_sites = sn.n_sites centers = sn.centers # -- Edge attributes all_cs = None all_linewidths = None all_color = None all_groups = None # Get value arrays as they exist for edgekey in self.edge_mappings: edgeval = getattr(sn, self.edge_mappings[edgekey]) if edgekey == 'intensity': all_cs = edgeval.copy() elif edgekey == 'width': all_linewidths = edgeval.copy() elif edgekey == 'group': assert edgeval.dtype == np.int all_groups = edgeval else: raise KeyError("Invalid edge mapping key `%s`" % edgekey) do_widths = not all_linewidths is None do_groups = not all_groups is None # - Normalize # Ignore values on the diagonal since we ignore them in the loop diag_mask = np.ones(shape = all_cs.shape, dtype = np.bool) np.fill_diagonal(diag_mask, False) self._normalize(all_cs, diag_mask) if do_widths: self._normalize(all_linewidths, diag_mask) # -- Construct Line3DCollection segments # Whether an edge has already been added done_already = np.zeros(shape = (n_sites, n_sites), dtype = np.bool) # For the Line3DCollection segments = [] cs = [] linewidths = [] groups = [] # To plot minimum images that are outside unit cell sites_to_plot = [] sites_to_plot_positions = [] for i in range(n_sites): for j in range(n_sites): # No self edges if i == j: continue # If was already done if done_already[i, j]: continue # Ignore anything below the threshold if all_cs[i, j] <= self.min_color_threshold: continue if do_widths and all_linewidths[i, j] <= self.min_width_threshold: continue segment = np.empty(shape = (2, 3), dtype = centers.dtype) segment[0] = centers[i] ptbuf = centers[j].copy() # Modified segment[1] in place minimg = pbcc.min_image(segment[0], ptbuf) was_already_min_img = minimg == 111 segment[1] = ptbuf segments.append(segment) # If they are eachother's minimum image, then don't bother plotting # j -> i if was_already_min_img: done_already[j, i] = True else: # We'll plot it sites_to_plot.append(j) sites_to_plot_positions.append(segment[1]) # The mean cs.append(np.mean([all_cs[i, j], all_cs[j, i]])) if do_widths: linewidths.append(np.mean([all_linewidths[i, j], all_linewidths[j, i]])) if do_groups: # Assumes symmetric groups.append(all_groups[i, j]) done_already[i, j] = True # -- Construct final Line3DCollection assert len(cs) == len(segments) if len(cs) > 0: lccolors = np.empty(shape = (len(cs), 4), dtype = np.float) # Group colors if do_groups: for i in range(len(cs)): if groups[i] >= len(SiteNetworkPlotter.EDGE_GROUP_COLORS) - 1: raise ValueError("Too many groups, not enough group colors") lccolors[i] = matplotlib.colors.to_rgba(SiteNetworkPlotter.EDGE_GROUP_COLORS[groups[i]]) else: lccolors[:] = matplotlib.colors.to_rgba(SiteNetworkPlotter.EDGE_GROUP_COLORS[0]) # Intensity alpha lccolors[:,3] = np.array(cs) * self.minmax_edge_alpha[1] lccolors[:,3] += self.minmax_edge_alpha[0] if do_widths: linewidths = np.asarray(linewidths) linewidths *= self.minmax_linewidth[1] linewidths += self.minmax_linewidth[0] else: linewidths = self.minmax_linewidth[1] * 0.5 lc = Line3DCollection(segments, linewidths = linewidths, colors = lccolors, zorder = -20) ax.add_collection(lc) # -- Plot new sites if len(sites_to_plot) > 0: sn2 = sn[sites_to_plot] sn2.update_centers(np.asarray(sites_to_plot_positions)) pts_params = dict(self.plot_points_params) pts_params['alpha'] = 0.2 return self._site_layers(sn2, pts_params, same_normalization = True) else: return [] else: return []
def periodic_voronoi(structure, logfile=sys.stdout): """ :param ASE.Atoms structure: """ pbcc = PBCCalculator(structure.cell) # Make a 3x3x3 supercell supercell = structure.repeat((3, 3, 3)) qhull_output = None logfile.write("Qvoronoi ---") # Run qhull with tempfile.NamedTemporaryFile('w', prefix = 'qvor', suffix='.in', delete = False) as infile, \ tempfile.NamedTemporaryFile('r', prefix = 'qvor', suffix='.out', delete=True) as outfile: # -- Write input file -- infile.write("3\n") # num of dimensions infile.write("%i\n" % len(supercell)) # num of points np.savetxt(infile, supercell.get_positions(), fmt='%.16f') infile.flush() cmdline = [ "qvoronoi", "TI", infile.name, "FF", "Fv", "TO", outfile.name ] process = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) retcode = process.wait() logfile.write(process.stdout.read()) if retcode != 0: raise RuntimeError("qvoronoi returned exit code %i" % retcode) qhull_output = outfile.read() facets_regex = re.compile( """ -[ \t](?P<facetkey>f[0-9]+) [\n] [ \t]*-[ ]flags: .* [\n] [ \t]*-[ ]normal: .* [\n] [ \t]*-[ ]offset: .* [\n] [ \t]*-[ ]center:(?P<center>([ ][\-]?[0-9]*[\.]?[0-9]*(e[-?[0-9]+)?){3}) [ \t] [\n] [ \t]*-[ ]vertices:(?P<vertices>([ ]p[0-9]+\(v[0-9]+\))+) [ \t]? [\n] [ \t]*-[ ]neighboring[ ]facets:(?P<neighbors>([ ]f[0-9]+)+) """, re.X | re.M) vertices_re = re.compile('(?<=p)[0-9]+') # Allocate stuff centers = [] vertices = [] facet_indexes_taken = set() facet_index_to_our_index = {} all_facets_centers = [] # ---- Read facets facet_index = -1 next_our_index = 0 for facet_match in facets_regex.finditer(qhull_output): center = np.asarray(map(float, facet_match.group('center').split())) facet_index += 1 all_facets_centers.append(center) if not pbcc.is_in_image_of_cell(center, (1, 1, 1)): continue verts = map(int, vertices_re.findall(facet_match.group('vertices'))) verts_in_main_cell = tuple(v % len(structure) for v in verts) facet_indexes_taken.add(facet_index) centers.append(center) vertices.append(verts_in_main_cell) facet_index_to_our_index[facet_index] = next_our_index next_our_index += 1 end_of_facets = facet_match.end() facet_count = facet_index + 1 logfile.write(" qhull gave %i vertices; kept %i" % (facet_count, len(centers))) # ---- Read ridges qhull_output_after_facets = qhull_output[end_of_facets:].strip() ridge_re = re.compile('^\d+ \d+ \d+(?P<verts>( \d+)+)$', re.M) ridges = [[int(v) for v in match.group('verts').split()] for match in ridge_re.finditer(qhull_output_after_facets)] # only take ridges with at least 1 facet in main unit cell. ridges = [r for r in ridges if any(f in facet_indexes_taken for f in r)] # shift centers back into normal unit cell centers -= np.sum(structure.cell, axis=0) nearest_center = KDTree(centers) ridges_in_main_cell = set() threw_out = 0 for r in ridges: ridge_centers = np.asarray( [all_facets_centers[f] for f in r if f < len(all_facets_centers)]) if not pbcc.all_in_unit_cell(ridge_centers): continue pbcc.wrap_points(ridge_centers) dists, ridge_centers_in_main = nearest_center.query( ridge_centers, return_distance=True) if np.any(dists > 0.00001): threw_out += 1 continue assert ridge_centers_in_main.shape == ( len(ridge_centers), 1), "%s" % ridge_centers_in_main.shape ridge_centers_in_main = ridge_centers_in_main[:, 0] ridges_in_main_cell.add(frozenset(ridge_centers_in_main)) logfile.write(" Threw out %i ridges" % threw_out) logfile.flush() return centers, vertices, ridges_in_main_cell
def run(self, sn, frames): """Run the landmark analysis. The input ``SiteNetwork`` is a network of predicted sites; it's sites will be used as the "basis" for the landmark vectors. Wraps a copy of ``frames`` into the unit cell. Args: sn (SiteNetwork): The landmark basis. Each site is a landmark defined by its vertex static atoms, as indicated by `sn.vertices`. (Typically from ``VoronoiSiteGenerator``.) frames (ndarray n_frames x n_atoms x 3): A trajectory. Can be unwrapped; a copy will be wrapped before the analysis. """ assert isinstance(sn, SiteNetwork) if self._has_run: raise ValueError("Cannot rerun LandmarkAnalysis!") if frames.shape[1:] != (sn.n_total, 3): raise ValueError("Wrong shape %s for frames." % (frames.shape, )) if sn.vertices is None: raise ValueError("Input SiteNetwork must have vertices") n_frames = len(frames) logger.info("--- Running Landmark Analysis ---") # Create PBCCalculator self._pbcc = PBCCalculator(sn.structure.cell) # -- Step 0: Wrap to Unit Cell orig_frames = frames # Keep a reference around frames = frames.copy() # Flatten to list of points for wrapping orig_frame_shape = frames.shape frames.shape = (orig_frame_shape[0] * orig_frame_shape[1], 3) self._pbcc.wrap_points(frames) # Back to list of frames frames.shape = orig_frame_shape # -- Step 1: Compute site-to-vertex distances self._landmark_dimension = sn.n_sites longest_vert_set = np.max([len(v) for v in sn.vertices]) verts_np = np.array([ np.concatenate((v, [-1] * (longest_vert_set - len(v)))) for v in sn.vertices ], dtype=np.int) site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float) site_vert_dists.fill(np.nan) for i, polyhedron in enumerate(sn.vertices): verts_poses = sn.static_structure.get_positions()[polyhedron] dists = self._pbcc.distances(sn.centers[i], verts_poses) site_vert_dists[i, :len(polyhedron)] = dists # -- Step 2: Compute landmark vectors logger.info(" - computing landmark vectors -") # Compute landmark vectors # The dimension of one landmark vector is the number of Voronoi regions shape = (n_frames * sn.n_mobile, self._landmark_dimension) with tempfile.NamedTemporaryFile() as mmap_backing: if self.force_no_memmap: self._landmark_vectors = np.empty(shape=shape, dtype=np.float) else: self._landmark_vectors = np.memmap(mmap_backing.name, mode='w+', dtype=np.float, shape=shape) helpers._fill_landmark_vectors( self, sn, verts_np, site_vert_dists, frames, check_for_zeros=self.check_for_zero_landmarks, tqdm=tqdm, logger=logger) if not self.check_for_zero_landmarks and self.n_all_zero_lvecs > 0: logger.warning( " Had %i all-zero landmark vectors; no error because `check_for_zero_landmarks = False`." % self.n_all_zero_lvecs) elif self.check_for_zero_landmarks: assert self.n_all_zero_lvecs == 0 # -- Step 3: Cluster landmark vectors logger.info(" - clustering landmark vectors -") # - Cluster - # FIXME: remove reload after development done clustermod = importlib.import_module("..cluster." + self._cluster_algo, package=__name__) importlib.reload(clustermod) cluster_func = clustermod.do_landmark_clustering clustering = \ cluster_func(self._landmark_vectors, clustering_params = self._clustering_params, min_samples = self._minimum_site_occupancy / float(sn.n_mobile), verbose = self.verbose) cluster_counts = clustering[LandmarkAnalysis.CLUSTERING_CLUSTER_SIZE] lmk_lbls = clustering[LandmarkAnalysis.CLUSTERING_LABELS] lmk_confs = clustering[LandmarkAnalysis.CLUSTERING_CONFIDENCES] if LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS in clustering: landmark_clusters = clustering[ LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS] assert len(cluster_counts) == len(landmark_clusters) else: landmark_clusters = None if LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS in clustering: rep_lvecs = np.asarray(clustering[ LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS]) assert rep_lvecs.shape == (len(cluster_counts), self._landmark_vectors.shape[1]) else: rep_lvecs = None logging.info( " Failed to assign %i%% of mobile particle positions to sites." % (100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls)))) # reshape lables and confidences lmk_lbls.shape = (n_frames, sn.n_mobile) lmk_confs.shape = (n_frames, sn.n_mobile) n_sites = len(cluster_counts) if n_sites < (sn.n_mobile / self.max_mobile_per_site): raise InsufficientSitesError(verb="Landmark analysis", n_sites=n_sites, n_mobile=sn.n_mobile) logging.info(" Identified %i sites with assignment counts %s" % (n_sites, cluster_counts)) # -- Do output out_sn = sn.copy() # - Compute site centers site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype) if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED or \ self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_UNWEIGHTED: for site in range(n_sites): mask = lmk_lbls == site pts = frames[:, sn.mobile_mask][mask] if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED: site_centers[site] = self._pbcc.average( pts, weights=lmk_confs[mask]) else: site_centers[site] = self._pbcc.average(pts) elif self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REPRESENTATIVE_LANDMARK: if rep_lvecs is None: raise ValueError( "Chosen clustering method (with current parameters) didn't return representative landmark vectors; can't use SITE_CENTERS_REPRESENTATIVE_LANDMARK." ) for site in range(n_sites): weights_nonzero = rep_lvecs[site] > 0 site_centers[site] = self._pbcc.average( sn.centers[weights_nonzero], weights=rep_lvecs[site, weights_nonzero]) else: raise ValueError("Invalid site centers method '%s'" % self.site_centers_method) out_sn.centers = site_centers # - If clustering gave us that, compute site vertices if landmark_clusters is not None: vertices = [] for lclust in landmark_clusters: vertices.append( set.union(*[set(sn.vertices[l]) for l in lclust])) out_sn.vertices = vertices out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs) # Check that multiple particles are never assigned to one site at the # same time, cause that would be wrong. self.n_multiple_assignments, self.avg_mobile_per_site = out_st.check_multiple_occupancy( max_mobile_per_site=self.max_mobile_per_site) out_st.set_real_traj(orig_frames) self._has_run = True return out_st
class LandmarkAnalysis(object): """Site analysis of mobile atoms in a static lattice with landmark analysis. :param double cutoff_center: The midpoint for the logistic function used as the landmark cutoff function. (unitless) :param double cutoff_steepness: Steepness of the logistic cutoff function. :param double minimum_site_occupancy = 0.1: Minimum occupancy (% of time occupied) for a site to qualify as such. :param str clustering_algorithm: The landmark clustering algorithm. ``sitator`` supplies two: - ``"dotprod"``: The method described in our "Unsupervised landmark analysis for jump detection in molecular dynamics simulations" paper. - ``"mcl"``: A newer method we are developing. :param dict clustering_params: Parameters for the chosen ``clustering_algorithm``. :param str site_centers_method: The method to use for computing the real space positions of the sites. Options: - ``SITE_CENTERS_REAL_UNWEIGHTED``: A spatial average of all real-space mobile atom positions assigned to the site is taken. - ``SITE_CENTERS_REAL_WEIGHTED``: A spatial average of all real-space mobile atom positions assigned to the site is taken, weighted by the confidences with which they assigned to the site. - ``SITE_CENTERS_REPRESENTATIVE_LANDMARK``: A spatial average over all landmarks' centers is taken, weighted by the representative or "typical" landmark vector at the site. The "real" methods will generally be more faithful to the simulation, but the representative landmark method can work better in cases with short trajectories, producing a more "ideal" site location. :param bool check_for_zero_landmarks: Whether to check for and raise exceptions when all-zero landmark vectors are computed. :param float static_movement_threshold: (Angstrom) the maximum allowed distance between an instantanous static atom position and it's ideal position. :param bool dynamic_lattice_mapping: Whether to dynamically decide each frame which static atom represents each average lattice position; this allows the LandmarkAnalysis to deal with, say, a rare exchage of two static atoms that does not change the structure of the lattice. It does NOT allow LandmarkAnalysis to deal with lattices whose structures actually change over the course of the trajectory. In certain cases this is better delt with by ``MergeSitesByDynamics``. :param int max_mobile_per_site: The maximum number of mobile atoms that can be assigned to a single site without throwing an error. Regardless of the value, assignments of more than one mobile atom to a single site will be recorded and reported. Setting this to 2 can be necessary for very diffusive, liquid-like materials at high temperatures. Statistics related to this are reported in ``self.avg_mobile_per_site`` and ``self.n_multiple_assignments``. :param bool force_no_memmap: if True, landmark vectors will be stored only in memory. Only useful if access to landmark vectors after the analysis has run is desired. :param bool verbose: Verbosity for the ``clustering_algorithm``. Other output controlled through ``logging``. """ SITE_CENTERS_REAL_UNWEIGHTED = 'real-unweighted' SITE_CENTERS_REAL_WEIGHTED = 'real-weighted' SITE_CENTERS_REPRESENTATIVE_LANDMARK = 'representative-landmark' CLUSTERING_CLUSTER_SIZE = 'cluster-size' CLUSTERING_LABELS = 'cluster-labels' CLUSTERING_CONFIDENCES = 'cluster-confs' CLUSTERING_LANDMARK_GROUPINGS = 'cluster-landmark-groupings' CLUSTERING_REPRESENTATIVE_LANDMARKS = 'cluster-representative-lvecs' def __init__(self, clustering_algorithm='dotprod', clustering_params={}, cutoff_midpoint=1.5, cutoff_steepness=30, minimum_site_occupancy=0.01, site_centers_method=SITE_CENTERS_REAL_WEIGHTED, check_for_zero_landmarks=True, static_movement_threshold=1.0, dynamic_lattice_mapping=False, relaxed_lattice_checks=False, max_mobile_per_site=1, force_no_memmap=False, verbose=True): self._cutoff_midpoint = cutoff_midpoint self._cutoff_steepness = cutoff_steepness self._minimum_site_occupancy = minimum_site_occupancy self._cluster_algo = clustering_algorithm self._clustering_params = clustering_params self.verbose = verbose self.check_for_zero_landmarks = check_for_zero_landmarks self.site_centers_method = site_centers_method self.dynamic_lattice_mapping = dynamic_lattice_mapping self.relaxed_lattice_checks = relaxed_lattice_checks self._landmark_vectors = None self._landmark_dimension = None self.static_movement_threshold = static_movement_threshold self.max_mobile_per_site = max_mobile_per_site self.force_no_memmap = force_no_memmap self._has_run = False @property def cutoff(self): return self._cutoff @analysis_result def landmark_vectors(self): """Landmark vectors from the last invocation of ``run()``""" view = self._landmark_vectors[:] view.flags.writeable = False return view @analysis_result def landmark_dimension(self): """Number of components in a single landmark vector.""" return self._landmark_dimension def run(self, sn, frames): """Run the landmark analysis. The input ``SiteNetwork`` is a network of predicted sites; it's sites will be used as the "basis" for the landmark vectors. Wraps a copy of ``frames`` into the unit cell. Args: sn (SiteNetwork): The landmark basis. Each site is a landmark defined by its vertex static atoms, as indicated by `sn.vertices`. (Typically from ``VoronoiSiteGenerator``.) frames (ndarray n_frames x n_atoms x 3): A trajectory. Can be unwrapped; a copy will be wrapped before the analysis. """ assert isinstance(sn, SiteNetwork) if self._has_run: raise ValueError("Cannot rerun LandmarkAnalysis!") if frames.shape[1:] != (sn.n_total, 3): raise ValueError("Wrong shape %s for frames." % (frames.shape, )) if sn.vertices is None: raise ValueError("Input SiteNetwork must have vertices") n_frames = len(frames) logger.info("--- Running Landmark Analysis ---") # Create PBCCalculator self._pbcc = PBCCalculator(sn.structure.cell) # -- Step 0: Wrap to Unit Cell orig_frames = frames # Keep a reference around frames = frames.copy() # Flatten to list of points for wrapping orig_frame_shape = frames.shape frames.shape = (orig_frame_shape[0] * orig_frame_shape[1], 3) self._pbcc.wrap_points(frames) # Back to list of frames frames.shape = orig_frame_shape # -- Step 1: Compute site-to-vertex distances self._landmark_dimension = sn.n_sites longest_vert_set = np.max([len(v) for v in sn.vertices]) verts_np = np.array([ np.concatenate((v, [-1] * (longest_vert_set - len(v)))) for v in sn.vertices ], dtype=np.int) site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float) site_vert_dists.fill(np.nan) for i, polyhedron in enumerate(sn.vertices): verts_poses = sn.static_structure.get_positions()[polyhedron] dists = self._pbcc.distances(sn.centers[i], verts_poses) site_vert_dists[i, :len(polyhedron)] = dists # -- Step 2: Compute landmark vectors logger.info(" - computing landmark vectors -") # Compute landmark vectors # The dimension of one landmark vector is the number of Voronoi regions shape = (n_frames * sn.n_mobile, self._landmark_dimension) with tempfile.NamedTemporaryFile() as mmap_backing: if self.force_no_memmap: self._landmark_vectors = np.empty(shape=shape, dtype=np.float) else: self._landmark_vectors = np.memmap(mmap_backing.name, mode='w+', dtype=np.float, shape=shape) helpers._fill_landmark_vectors( self, sn, verts_np, site_vert_dists, frames, check_for_zeros=self.check_for_zero_landmarks, tqdm=tqdm, logger=logger) if not self.check_for_zero_landmarks and self.n_all_zero_lvecs > 0: logger.warning( " Had %i all-zero landmark vectors; no error because `check_for_zero_landmarks = False`." % self.n_all_zero_lvecs) elif self.check_for_zero_landmarks: assert self.n_all_zero_lvecs == 0 # -- Step 3: Cluster landmark vectors logger.info(" - clustering landmark vectors -") # - Cluster - # FIXME: remove reload after development done clustermod = importlib.import_module("..cluster." + self._cluster_algo, package=__name__) importlib.reload(clustermod) cluster_func = clustermod.do_landmark_clustering clustering = \ cluster_func(self._landmark_vectors, clustering_params = self._clustering_params, min_samples = self._minimum_site_occupancy / float(sn.n_mobile), verbose = self.verbose) cluster_counts = clustering[LandmarkAnalysis.CLUSTERING_CLUSTER_SIZE] lmk_lbls = clustering[LandmarkAnalysis.CLUSTERING_LABELS] lmk_confs = clustering[LandmarkAnalysis.CLUSTERING_CONFIDENCES] if LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS in clustering: landmark_clusters = clustering[ LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS] assert len(cluster_counts) == len(landmark_clusters) else: landmark_clusters = None if LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS in clustering: rep_lvecs = np.asarray(clustering[ LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS]) assert rep_lvecs.shape == (len(cluster_counts), self._landmark_vectors.shape[1]) else: rep_lvecs = None logging.info( " Failed to assign %i%% of mobile particle positions to sites." % (100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls)))) # reshape lables and confidences lmk_lbls.shape = (n_frames, sn.n_mobile) lmk_confs.shape = (n_frames, sn.n_mobile) n_sites = len(cluster_counts) if n_sites < (sn.n_mobile / self.max_mobile_per_site): raise InsufficientSitesError(verb="Landmark analysis", n_sites=n_sites, n_mobile=sn.n_mobile) logging.info(" Identified %i sites with assignment counts %s" % (n_sites, cluster_counts)) # -- Do output out_sn = sn.copy() # - Compute site centers site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype) if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED or \ self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_UNWEIGHTED: for site in range(n_sites): mask = lmk_lbls == site pts = frames[:, sn.mobile_mask][mask] if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED: site_centers[site] = self._pbcc.average( pts, weights=lmk_confs[mask]) else: site_centers[site] = self._pbcc.average(pts) elif self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REPRESENTATIVE_LANDMARK: if rep_lvecs is None: raise ValueError( "Chosen clustering method (with current parameters) didn't return representative landmark vectors; can't use SITE_CENTERS_REPRESENTATIVE_LANDMARK." ) for site in range(n_sites): weights_nonzero = rep_lvecs[site] > 0 site_centers[site] = self._pbcc.average( sn.centers[weights_nonzero], weights=rep_lvecs[site, weights_nonzero]) else: raise ValueError("Invalid site centers method '%s'" % self.site_centers_method) out_sn.centers = site_centers # - If clustering gave us that, compute site vertices if landmark_clusters is not None: vertices = [] for lclust in landmark_clusters: vertices.append( set.union(*[set(sn.vertices[l]) for l in lclust])) out_sn.vertices = vertices out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs) # Check that multiple particles are never assigned to one site at the # same time, cause that would be wrong. self.n_multiple_assignments, self.avg_mobile_per_site = out_st.check_multiple_occupancy( max_mobile_per_site=self.max_mobile_per_site) out_st.set_real_traj(orig_frames) self._has_run = True return out_st
def voronoi(self, structure, radial=False, verbose=True): """ :param Atoms structure: The ASE Atoms to compute the Voronoi decomposition of. """ if self._tmpdir is None: raise ValueError("Cannot use Zeopy outside with statement") inp = os.path.join(self._tmpdir, "in.cif") outp = os.path.join(self._tmpdir, "out.nt2") v1out = os.path.join(self._tmpdir, "out.v1") ase.io.write(inp, structure) # with open(inp, "w") as inf: # inf.write(self.ase2cuc(structure)) args = [] if not radial: args = ["-nor"] try: output = subprocess.check_output([self._exe] + args + ["-v1", v1out, "-nt2", outp, inp], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: print("Zeo++ returned an error:", file=sys.stderr) print(e.output, file=sys.stderr) raise if verbose: print(output) with open(outp, "r") as outf: verts, edges = self.parse_nt2(outf.readlines()) with open(v1out, "r") as outf: zeocell = self.parse_v1_cell(outf.readlines()) # Confirm things really are in order -- sort of # Looking at the Zeo code, I don't think it reorders cell vectors -- # it just rotates them. assert np.all( np.linalg.norm(zeocell, axis=1) - np.linalg.norm(structure.cell, axis=1) < 0.0001) vert_coords = np.asarray([v['coords'] for v in verts]) zeopbcc = PBCCalculator(zeocell) real_pbcc = PBCCalculator(structure.cell) # Bring into Zeo crystal coordinates zeopbcc.to_cell_coords(vert_coords) # Bring into our real coords real_pbcc.to_real_coords(vert_coords) edges_np = np.empty(shape=(len(edges), 2), dtype=np.int) edge_radius = np.empty(shape=len(edges), dtype=np.float) for i, edge in enumerate(edges): edges_np[i, 0] = edge['from'] edges_np[i, 1] = edge['to'] edge_radius[i] = edge['radius'] return (vert_coords, [v['region-atom-indexes'] for v in verts], edges_np, edge_radius)
def _build_mic_connmat(self, sn, connectivity_matrix): # We use a 3x3x3 = 27 supercell, so there are 27x as many sites assert len(sn) == connectivity_matrix.shape[0] images = np.asarray(list(itertools.product(range(-1, 2), repeat=3))) image_to_idex = dict( (100 * (image[0] + 1) + 10 * (image[1] + 1) + (image[2] + 1), i) for i, image in enumerate(images)) n_images = len(images) assert n_images == 27 n_sites = len(sn) pos = sn.centers #.copy() # TODO: copy not needed after reinstall of sitator! n_total_sites = len(images) * n_sites newmat = lil_matrix((n_total_sites, n_total_sites), dtype=np.bool) mask_000 = np.zeros(shape=n_total_sites, dtype=np.bool) index_000 = image_to_idex[111] mask_000[index_000:index_000 + n_sites] = True assert np.sum(mask_000) == len(sn) pbcc = PBCCalculator(sn.structure.cell) buf = np.empty(shape=3) internal_mat = np.zeros_like(connectivity_matrix) external_connections = [] for from_site, to_site in zip(*np.where(connectivity_matrix)): buf[:] = pos[to_site] if pbcc.min_image(pos[from_site], buf) == 111: # If we're in the main image, keep the connection: it's internal internal_mat[from_site, to_site] = True #internal_mat[to_site, from_site] = True # fake FIXME else: external_connections.append((from_site, to_site)) #external_connections.append((to_site, from_site)) # FAKE FIXME for image_idex, image in enumerate(images): # Make the block diagonal newmat[image_idex * n_sites:(image_idex + 1) * n_sites, image_idex * n_sites:(image_idex + 1) * n_sites] = internal_mat # Check all external connections from this image; add other sparse entries for from_site, to_site in external_connections: buf[:] = pos[to_site] to_mic = pbcc.min_image(pos[from_site], buf) to_in_image = image + [ (to_mic // 10**(2 - i) % 10) - 1 for i in range(3) ] # FIXME: is the -1 right assert to_in_image is not None, "%s" % to_in_image assert np.max(np.abs(to_in_image)) <= 2 if not np.any(np.abs(to_in_image) > 1): to_in_image = 100 * (to_in_image[0] + 1) + 10 * ( to_in_image[1] + 1) + 1 * (to_in_image[2] + 1) newmat[image_idex * n_sites + from_site, image_to_idex[to_in_image] * n_sites + to_site] = True assert np.sum(newmat) >= n_images * np.sum( internal_mat) # Lowest it can be is if every one is internal return newmat, mask_000, images