def get_layer_heights_kmeans(traj, cell, n, surface_normal=np.array([0, 0, 1])): """Find the heights of the centers of the layers, along `surface_normal`, in `traj`. Uses k-means over all (`surface_normal`-relative) heights in the trajectory. Args: - traj (ndarray n_frames x n_atoms x 3) - cell (ndarray 3x3 matrix) - n (int): The number of layers to identify (the k for k-means). - surface_normal (3-vector): A unit vector normal to the surface. Defaults to the z direction <0, 0, 1>. Returns: sorted ndarray of heights along surface normal """ from sklearn.cluster import KMeans # We have to wrap first to get consistant results along the surface normal traj = traj.copy().reshape(-1, 3) pbcc = PBCCalculator(cell) pbcc.wrap_points(traj) heights = np.dot(surface_normal, traj.T) kmeans = KMeans(n_clusters=n).fit(heights.reshape(-1, 1)) heights = kmeans.cluster_centers_.reshape(-1) heights.sort() return heights
def compute_volumes(self, sn): """Computes the volume of the convex hull defined by each sites' static verticies. Requires vertex information in the SiteNetwork. Adds the ``site_volumes`` and ``site_surface_areas`` attributes. Volumes can be NaN for degenerate hulls/point sets on which QHull fails. Args: - sn (SiteNetwork) """ assert isinstance(sn, SiteNetwork) if sn.vertices is None: raise ValueError( "SiteNetwork must have verticies to compute volumes!") vols = np.empty(shape=sn.n_sites, dtype=np.float) areas = np.empty(shape=sn.n_sites, dtype=np.float) pbcc = PBCCalculator(sn.structure.cell) for site in range(sn.n_sites): pos = sn.static_structure.positions[list(sn.vertices[site])] if len(pos) < 4: if self.error_on_insufficient_coord: raise InsufficientCoordinatingAtomsError( "Site %i had only %i vertices (less than needed 4)" % (site, len(pos))) else: vols[site] = 0 areas[site] = np.nan continue assert pos.flags[ 'OWNDATA'] # It should since we're indexing with index lists # Recenter offset = pbcc.cell_centroid - sn.centers[site] pos += offset pbcc.wrap_points(pos) try: hull = ConvexHull(pos) vols[site] = hull.volume areas[site] = hull.area except QhullError as qhe: logger.warning( "Had QHull failure when computing volume of site %i" % site) vols[site] = np.nan areas[site] = np.nan sn.add_site_attribute('site_volumes', vols) sn.add_site_attribute('site_surface_areas', areas)
def compute_accessable_volumes(self, st, n_recenterings=8): """Computes the volumes of convex hulls around all positions associated with a site. Uses the shift-and-wrap trick for dealing with periodicity, so sites that take up the majority of the unit cell may give bogus results. Adds the ``accessable_site_volumes`` attribute to the ``SiteNetwork``. Args: st (SiteTrajectory) n_recenterings (int): How many different recenterings to try (the algorithm will recenter around n of the points and take the minimal resulting volume; this deals with cases where there is one outlier where recentering around it gives very bad results.) """ assert isinstance(st, SiteTrajectory) vols = np.empty(shape=st.site_network.n_sites, dtype=np.float) areas = np.empty(shape=st.site_network.n_sites, dtype=np.float) pbcc = PBCCalculator(st.site_network.structure.cell) for site in range(st.site_network.n_sites): pos = st.real_positions_for_site(site) assert pos.flags['OWNDATA'] vol = np.inf area = None for i in range(n_recenterings): # Recenter offset = pbcc.cell_centroid - pos[int( i * (len(pos) / n_recenterings))] pos += offset pbcc.wrap_points(pos) try: hull = ConvexHull(pos) except QhullError as qhe: logger.warning("For site %i, iter %i: %s" % (site, i, qhe)) vols[site] = np.nan areas[site] = np.nan continue if hull.volume < vol: vol = hull.volume area = hull.area vols[site] = vol areas[site] = area st.site_network.add_site_attribute('accessable_site_volumes', vols)
def run(self, sn): assert isinstance(sn, SiteNetwork) out = sn.copy() pbcc = PBCCalculator(sn.structure.cell) newcenters = out.centers.repeat(self.n, axis = 0) assert len(newcenters) == self.n * len(out.centers) newcenters += self.sigma * np.random.standard_normal(size = newcenters.shape) pbcc.wrap_points(newcenters) out.centers = newcenters return out
def plot_atoms(atoms, positions = None, hide_species = (), wrap = False, fig = None, ax = None, i = None): mask = [not (e in hide_species) for e in atoms.get_chemical_symbols()] if positions is None: pts = atoms.get_positions() else: pts = positions pts = pts[mask] species = [s for i, s in enumerate(atoms.get_chemical_symbols()) if mask[i]] if wrap: pbcc = PBCCalculator(atoms.cell) pts = atoms.get_positions().copy() pbcc.wrap_points(pts) ax.scatter(pts[:,0], pts[:,1], pts[:,2], c = [color_for_species(s) for s in species], s = [20.0 * ase.data.covalent_radii[ase.data.atomic_numbers[s]] for s in species]) all_cvecs = [] whos_left = set(xrange(len(atoms.cell))) for i, cvec1 in enumerate(atoms.cell): all_cvecs.append(np.array([[0.0, 0.0, 0.0], cvec1])) for j, cvec2 in enumerate(atoms.cell[list(whos_left - {i})]): all_cvecs.append(np.array([cvec1, cvec1 + cvec2])) for i, cvec1 in enumerate(atoms.cell): start = np.sum(atoms.cell[list(whos_left - {i})], axis = 0) all_cvecs.append(np.array([start, start + cvec1])) for cvec in all_cvecs: ax.plot(cvec[:,0], cvec[:,1], cvec[:,2], color = "gray", alpha=0.5, linewidth = 0.7, linestyle="--") set_axes_equal(ax)
def plot_site(self, site, **kwargs): pbcc = PBCCalculator(self._sn.structure.cell) pts = self.real_positions_for_site(site).copy() offset = pbcc.cell_centroid - pts[3] pts += offset pbcc.wrap_points(pts) lattice_pos = self._sn.static_structure.positions.copy() lattice_pos += offset pbcc.wrap_points(lattice_pos) site_pos = self._sn.centers[site:site + 1].copy() site_pos += offset pbcc.wrap_points(site_pos) # Plot point cloud plot_points(points=pts, alpha=0.3, marker='.', color='k', **kwargs) # Plot site plot_points(points=site_pos, color='cyan', **kwargs) # Plot everything else plot_atoms(self._sn.static_structure, positions=lattice_pos, **kwargs) title = "Site %i/%i" % (site, len(self._sn)) if not self._sn.site_types is None: title += " (type %i)" % self._sn.site_types[site] kwargs['ax'].set_title(title)
def run(self, st): vols = np.empty(shape = st.site_network.n_sites, dtype = np.float) areas = np.empty(shape = st.site_network.n_sites, dtype = np.float) pbcc = PBCCalculator(st.site_network.structure.cell) for site in xrange(st.site_network.n_sites): pos = st.real_positions_for_site(site) assert pos.flags['OWNDATA'] vol = np.inf area = None for i in xrange(self.n_recenterings): # Recenter offset = pbcc.cell_centroid - pos[int(i * (len(pos)/self.n_recenterings))] pos += offset pbcc.wrap_points(pos) try: hull = ConvexHull(pos) except QhullError as qhe: print "For site %i, iter %i: %s" % (site, i, qhe) vols[site] = np.nan areas[site] = np.nan continue if hull.volume < vol: vol = hull.volume area = hull.area vols[site] = vol areas[site] = area st.site_network.add_site_attribute('site_volumes', vols) st.site_network.add_site_attribute('site_surface_areas', areas)
class LandmarkAnalysis(object): """Site analysis of mobile atoms in a static lattice with landmark analysis. :param double cutoff_center: The midpoint for the logistic function used as the landmark cutoff function. (unitless) :param double cutoff_steepness: Steepness of the logistic cutoff function. :param double minimum_site_occupancy = 0.1: Minimum occupancy (% of time occupied) for a site to qualify as such. :param str clustering_algorithm: The landmark clustering algorithm. ``sitator`` supplies two: - ``"dotprod"``: The method described in our "Unsupervised landmark analysis for jump detection in molecular dynamics simulations" paper. - ``"mcl"``: A newer method we are developing. :param dict clustering_params: Parameters for the chosen ``clustering_algorithm``. :param str site_centers_method: The method to use for computing the real space positions of the sites. Options: - ``SITE_CENTERS_REAL_UNWEIGHTED``: A spatial average of all real-space mobile atom positions assigned to the site is taken. - ``SITE_CENTERS_REAL_WEIGHTED``: A spatial average of all real-space mobile atom positions assigned to the site is taken, weighted by the confidences with which they assigned to the site. - ``SITE_CENTERS_REPRESENTATIVE_LANDMARK``: A spatial average over all landmarks' centers is taken, weighted by the representative or "typical" landmark vector at the site. The "real" methods will generally be more faithful to the simulation, but the representative landmark method can work better in cases with short trajectories, producing a more "ideal" site location. :param bool check_for_zero_landmarks: Whether to check for and raise exceptions when all-zero landmark vectors are computed. :param float static_movement_threshold: (Angstrom) the maximum allowed distance between an instantanous static atom position and it's ideal position. :param bool dynamic_lattice_mapping: Whether to dynamically decide each frame which static atom represents each average lattice position; this allows the LandmarkAnalysis to deal with, say, a rare exchage of two static atoms that does not change the structure of the lattice. It does NOT allow LandmarkAnalysis to deal with lattices whose structures actually change over the course of the trajectory. In certain cases this is better delt with by ``MergeSitesByDynamics``. :param int max_mobile_per_site: The maximum number of mobile atoms that can be assigned to a single site without throwing an error. Regardless of the value, assignments of more than one mobile atom to a single site will be recorded and reported. Setting this to 2 can be necessary for very diffusive, liquid-like materials at high temperatures. Statistics related to this are reported in ``self.avg_mobile_per_site`` and ``self.n_multiple_assignments``. :param bool force_no_memmap: if True, landmark vectors will be stored only in memory. Only useful if access to landmark vectors after the analysis has run is desired. :param bool verbose: Verbosity for the ``clustering_algorithm``. Other output controlled through ``logging``. """ SITE_CENTERS_REAL_UNWEIGHTED = 'real-unweighted' SITE_CENTERS_REAL_WEIGHTED = 'real-weighted' SITE_CENTERS_REPRESENTATIVE_LANDMARK = 'representative-landmark' CLUSTERING_CLUSTER_SIZE = 'cluster-size' CLUSTERING_LABELS = 'cluster-labels' CLUSTERING_CONFIDENCES = 'cluster-confs' CLUSTERING_LANDMARK_GROUPINGS = 'cluster-landmark-groupings' CLUSTERING_REPRESENTATIVE_LANDMARKS = 'cluster-representative-lvecs' def __init__(self, clustering_algorithm='dotprod', clustering_params={}, cutoff_midpoint=1.5, cutoff_steepness=30, minimum_site_occupancy=0.01, site_centers_method=SITE_CENTERS_REAL_WEIGHTED, check_for_zero_landmarks=True, static_movement_threshold=1.0, dynamic_lattice_mapping=False, relaxed_lattice_checks=False, max_mobile_per_site=1, force_no_memmap=False, verbose=True): self._cutoff_midpoint = cutoff_midpoint self._cutoff_steepness = cutoff_steepness self._minimum_site_occupancy = minimum_site_occupancy self._cluster_algo = clustering_algorithm self._clustering_params = clustering_params self.verbose = verbose self.check_for_zero_landmarks = check_for_zero_landmarks self.site_centers_method = site_centers_method self.dynamic_lattice_mapping = dynamic_lattice_mapping self.relaxed_lattice_checks = relaxed_lattice_checks self._landmark_vectors = None self._landmark_dimension = None self.static_movement_threshold = static_movement_threshold self.max_mobile_per_site = max_mobile_per_site self.force_no_memmap = force_no_memmap self._has_run = False @property def cutoff(self): return self._cutoff @analysis_result def landmark_vectors(self): """Landmark vectors from the last invocation of ``run()``""" view = self._landmark_vectors[:] view.flags.writeable = False return view @analysis_result def landmark_dimension(self): """Number of components in a single landmark vector.""" return self._landmark_dimension def run(self, sn, frames): """Run the landmark analysis. The input ``SiteNetwork`` is a network of predicted sites; it's sites will be used as the "basis" for the landmark vectors. Wraps a copy of ``frames`` into the unit cell. Args: sn (SiteNetwork): The landmark basis. Each site is a landmark defined by its vertex static atoms, as indicated by `sn.vertices`. (Typically from ``VoronoiSiteGenerator``.) frames (ndarray n_frames x n_atoms x 3): A trajectory. Can be unwrapped; a copy will be wrapped before the analysis. """ assert isinstance(sn, SiteNetwork) if self._has_run: raise ValueError("Cannot rerun LandmarkAnalysis!") if frames.shape[1:] != (sn.n_total, 3): raise ValueError("Wrong shape %s for frames." % (frames.shape, )) if sn.vertices is None: raise ValueError("Input SiteNetwork must have vertices") n_frames = len(frames) logger.info("--- Running Landmark Analysis ---") # Create PBCCalculator self._pbcc = PBCCalculator(sn.structure.cell) # -- Step 0: Wrap to Unit Cell orig_frames = frames # Keep a reference around frames = frames.copy() # Flatten to list of points for wrapping orig_frame_shape = frames.shape frames.shape = (orig_frame_shape[0] * orig_frame_shape[1], 3) self._pbcc.wrap_points(frames) # Back to list of frames frames.shape = orig_frame_shape # -- Step 1: Compute site-to-vertex distances self._landmark_dimension = sn.n_sites longest_vert_set = np.max([len(v) for v in sn.vertices]) verts_np = np.array([ np.concatenate((v, [-1] * (longest_vert_set - len(v)))) for v in sn.vertices ], dtype=np.int) site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float) site_vert_dists.fill(np.nan) for i, polyhedron in enumerate(sn.vertices): verts_poses = sn.static_structure.get_positions()[polyhedron] dists = self._pbcc.distances(sn.centers[i], verts_poses) site_vert_dists[i, :len(polyhedron)] = dists # -- Step 2: Compute landmark vectors logger.info(" - computing landmark vectors -") # Compute landmark vectors # The dimension of one landmark vector is the number of Voronoi regions shape = (n_frames * sn.n_mobile, self._landmark_dimension) with tempfile.NamedTemporaryFile() as mmap_backing: if self.force_no_memmap: self._landmark_vectors = np.empty(shape=shape, dtype=np.float) else: self._landmark_vectors = np.memmap(mmap_backing.name, mode='w+', dtype=np.float, shape=shape) helpers._fill_landmark_vectors( self, sn, verts_np, site_vert_dists, frames, check_for_zeros=self.check_for_zero_landmarks, tqdm=tqdm, logger=logger) if not self.check_for_zero_landmarks and self.n_all_zero_lvecs > 0: logger.warning( " Had %i all-zero landmark vectors; no error because `check_for_zero_landmarks = False`." % self.n_all_zero_lvecs) elif self.check_for_zero_landmarks: assert self.n_all_zero_lvecs == 0 # -- Step 3: Cluster landmark vectors logger.info(" - clustering landmark vectors -") # - Cluster - # FIXME: remove reload after development done clustermod = importlib.import_module("..cluster." + self._cluster_algo, package=__name__) importlib.reload(clustermod) cluster_func = clustermod.do_landmark_clustering clustering = \ cluster_func(self._landmark_vectors, clustering_params = self._clustering_params, min_samples = self._minimum_site_occupancy / float(sn.n_mobile), verbose = self.verbose) cluster_counts = clustering[LandmarkAnalysis.CLUSTERING_CLUSTER_SIZE] lmk_lbls = clustering[LandmarkAnalysis.CLUSTERING_LABELS] lmk_confs = clustering[LandmarkAnalysis.CLUSTERING_CONFIDENCES] if LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS in clustering: landmark_clusters = clustering[ LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS] assert len(cluster_counts) == len(landmark_clusters) else: landmark_clusters = None if LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS in clustering: rep_lvecs = np.asarray(clustering[ LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS]) assert rep_lvecs.shape == (len(cluster_counts), self._landmark_vectors.shape[1]) else: rep_lvecs = None logging.info( " Failed to assign %i%% of mobile particle positions to sites." % (100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls)))) # reshape lables and confidences lmk_lbls.shape = (n_frames, sn.n_mobile) lmk_confs.shape = (n_frames, sn.n_mobile) n_sites = len(cluster_counts) if n_sites < (sn.n_mobile / self.max_mobile_per_site): raise InsufficientSitesError(verb="Landmark analysis", n_sites=n_sites, n_mobile=sn.n_mobile) logging.info(" Identified %i sites with assignment counts %s" % (n_sites, cluster_counts)) # -- Do output out_sn = sn.copy() # - Compute site centers site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype) if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED or \ self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_UNWEIGHTED: for site in range(n_sites): mask = lmk_lbls == site pts = frames[:, sn.mobile_mask][mask] if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED: site_centers[site] = self._pbcc.average( pts, weights=lmk_confs[mask]) else: site_centers[site] = self._pbcc.average(pts) elif self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REPRESENTATIVE_LANDMARK: if rep_lvecs is None: raise ValueError( "Chosen clustering method (with current parameters) didn't return representative landmark vectors; can't use SITE_CENTERS_REPRESENTATIVE_LANDMARK." ) for site in range(n_sites): weights_nonzero = rep_lvecs[site] > 0 site_centers[site] = self._pbcc.average( sn.centers[weights_nonzero], weights=rep_lvecs[site, weights_nonzero]) else: raise ValueError("Invalid site centers method '%s'" % self.site_centers_method) out_sn.centers = site_centers # - If clustering gave us that, compute site vertices if landmark_clusters is not None: vertices = [] for lclust in landmark_clusters: vertices.append( set.union(*[set(sn.vertices[l]) for l in lclust])) out_sn.vertices = vertices out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs) # Check that multiple particles are never assigned to one site at the # same time, cause that would be wrong. self.n_multiple_assignments, self.avg_mobile_per_site = out_st.check_multiple_occupancy( max_mobile_per_site=self.max_mobile_per_site) out_st.set_real_traj(orig_frames) self._has_run = True return out_st
def periodic_voronoi(structure, logfile=sys.stdout): """ :param ASE.Atoms structure: """ pbcc = PBCCalculator(structure.cell) # Make a 3x3x3 supercell supercell = structure.repeat((3, 3, 3)) qhull_output = None logfile.write("Qvoronoi ---") # Run qhull with tempfile.NamedTemporaryFile('w', prefix = 'qvor', suffix='.in', delete = False) as infile, \ tempfile.NamedTemporaryFile('r', prefix = 'qvor', suffix='.out', delete=True) as outfile: # -- Write input file -- infile.write("3\n") # num of dimensions infile.write("%i\n" % len(supercell)) # num of points np.savetxt(infile, supercell.get_positions(), fmt='%.16f') infile.flush() cmdline = [ "qvoronoi", "TI", infile.name, "FF", "Fv", "TO", outfile.name ] process = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) retcode = process.wait() logfile.write(process.stdout.read()) if retcode != 0: raise RuntimeError("qvoronoi returned exit code %i" % retcode) qhull_output = outfile.read() facets_regex = re.compile( """ -[ \t](?P<facetkey>f[0-9]+) [\n] [ \t]*-[ ]flags: .* [\n] [ \t]*-[ ]normal: .* [\n] [ \t]*-[ ]offset: .* [\n] [ \t]*-[ ]center:(?P<center>([ ][\-]?[0-9]*[\.]?[0-9]*(e[-?[0-9]+)?){3}) [ \t] [\n] [ \t]*-[ ]vertices:(?P<vertices>([ ]p[0-9]+\(v[0-9]+\))+) [ \t]? [\n] [ \t]*-[ ]neighboring[ ]facets:(?P<neighbors>([ ]f[0-9]+)+) """, re.X | re.M) vertices_re = re.compile('(?<=p)[0-9]+') # Allocate stuff centers = [] vertices = [] facet_indexes_taken = set() facet_index_to_our_index = {} all_facets_centers = [] # ---- Read facets facet_index = -1 next_our_index = 0 for facet_match in facets_regex.finditer(qhull_output): center = np.asarray(map(float, facet_match.group('center').split())) facet_index += 1 all_facets_centers.append(center) if not pbcc.is_in_image_of_cell(center, (1, 1, 1)): continue verts = map(int, vertices_re.findall(facet_match.group('vertices'))) verts_in_main_cell = tuple(v % len(structure) for v in verts) facet_indexes_taken.add(facet_index) centers.append(center) vertices.append(verts_in_main_cell) facet_index_to_our_index[facet_index] = next_our_index next_our_index += 1 end_of_facets = facet_match.end() facet_count = facet_index + 1 logfile.write(" qhull gave %i vertices; kept %i" % (facet_count, len(centers))) # ---- Read ridges qhull_output_after_facets = qhull_output[end_of_facets:].strip() ridge_re = re.compile('^\d+ \d+ \d+(?P<verts>( \d+)+)$', re.M) ridges = [[int(v) for v in match.group('verts').split()] for match in ridge_re.finditer(qhull_output_after_facets)] # only take ridges with at least 1 facet in main unit cell. ridges = [r for r in ridges if any(f in facet_indexes_taken for f in r)] # shift centers back into normal unit cell centers -= np.sum(structure.cell, axis=0) nearest_center = KDTree(centers) ridges_in_main_cell = set() threw_out = 0 for r in ridges: ridge_centers = np.asarray( [all_facets_centers[f] for f in r if f < len(all_facets_centers)]) if not pbcc.all_in_unit_cell(ridge_centers): continue pbcc.wrap_points(ridge_centers) dists, ridge_centers_in_main = nearest_center.query( ridge_centers, return_distance=True) if np.any(dists > 0.00001): threw_out += 1 continue assert ridge_centers_in_main.shape == ( len(ridge_centers), 1), "%s" % ridge_centers_in_main.shape ridge_centers_in_main = ridge_centers_in_main[:, 0] ridges_in_main_cell.add(frozenset(ridge_centers_in_main)) logfile.write(" Threw out %i ridges" % threw_out) logfile.flush() return centers, vertices, ridges_in_main_cell