def replace_with_closer(st, mobile_atom, before_site, start_frame,
                                after_site, end_frame):
            if before_site == SiteTrajectory.SITE_UNKNOWN or \
               after_site == SiteTrajectory.SITE_UNKNOWN:
                return SiteTrajectory.SITE_UNKNOWN

            if pbcc is None:
                pbcc = PBCCalculator(st.site_network.structure.cell)
            n_frames = end_frame - start_frame
            out = np.empty(shape=n_frames)
            for i in range(n_frames):
                ptbuf[0] = st.site_network.centers[before_site]
                ptbuf[1] = st.site_network.centers[after_site]
                pbcc.distances(st.real_trajectory[start_frame + i,
                                                  mobile_atom],
                               ptbuf,
                               in_place=True,
                               out=distbuf)
                if distbuf[0] < distbuf[1]:
                    out[i] = before_site
                else:
                    out[i] = after_site
            return out
    def _get_sites_to_merge(self, st):
        # -- Compute jump statistics
        if not st.site_network.has_attribute('n_ij'):
            ja = JumpAnalysis()
            ja.run(st)

        pbcc = PBCCalculator(st.site_network.structure.cell)
        site_centers = st.site_network.centers

        # -- Build connectivity_matrix
        connectivity_matrix = self.connectivity_matrix_generator(
            st.site_network).copy()
        n_sites_before = st.site_network.n_sites
        assert n_sites_before == connectivity_matrix.shape[0]

        centers_before = st.site_network.centers

        # For diagnostic purposes
        no_diag_graph = connectivity_matrix.astype(dtype=np.float, copy=True)
        np.fill_diagonal(no_diag_graph, np.nan)
        # Rather arbitrary, but this is really just an alarm for if things
        # are really, really wrong
        edge_threshold = np.nanmean(
            no_diag_graph) + 3 * np.nanstd(no_diag_graph)
        n_alarming_ignored_edges = 0

        # Apply distance threshold
        for i in range(n_sites_before):
            dists = pbcc.distances(centers_before[i], centers_before[i + 1:])
            js_too_far = np.where(dists > self.distance_threshold)[0]
            js_too_far += i + 1

            if np.any(connectivity_matrix[i, js_too_far] > edge_threshold) or \
               np.any(connectivity_matrix[js_too_far, i] > edge_threshold):
                n_alarming_ignored_edges += 1

            connectivity_matrix[i, js_too_far] = 0
            connectivity_matrix[js_too_far, i] = 0  # Symmetry

        if n_alarming_ignored_edges > 0:
            logger.warning(
                "  At least %i site pairs with high (z-score > 3) fluxes were over the given distance cutoff.\n"
                "  This may or may not be a problem; but if `distance_threshold` is low, consider raising it."
                % n_alarming_ignored_edges)

        # -- Do Markov Clustering
        clusters = markov_clustering(connectivity_matrix,
                                     **self.markov_parameters)
        return clusters
    def _get_sites_to_merge(self, st, threshold=0):
        sn = st.site_network

        attrmat = getattr(sn, self.attrname)
        assert attrmat.shape == (
            sn.n_sites, sn.n_sites
        ), "`attrname` doesn't seem to indicate an edge property."
        connmat = self.relation(attrmat, threshold)

        # Apply distance threshold
        if self.distance_threshold < np.inf:
            pbcc = PBCCalculator(sn.structure.cell)
            centers = sn.centers
            for i in range(sn.n_sites):
                dists = pbcc.distances(centers[i], centers[i + 1:])
                js_too_far = np.where(dists > self.distance_threshold)[0]
                js_too_far += i + 1

                connmat[i, js_too_far] = False
                connmat[js_too_far, i] = False  # Symmetry

        if self.forbid_multiple_occupancy:
            n_mobile = sn.n_mobile
            for frame in st.traj:
                frame = [s for s in frame if s >= 0]
                for site in frame:  # only known
                    # can't merge occupied site with other simulatanious occupied sites
                    connmat[site, frame] = False

        # Everything is always mergable with itself.
        np.fill_diagonal(connmat, True)

        # Get mergable groups
        n_merged_sites, labels = connected_components(
            connmat, directed=self.directed, connection=self.connection)
        # MergeSites will check pairwise distances; we just need to make it the
        # right format.
        merge_groups = []
        for lbl in range(n_merged_sites):
            merge_groups.append(np.where(labels == lbl)[0])

        return merge_groups
Beispiel #4
0
class LandmarkAnalysis(object):
    """Track a mobile species through a fixed lattice using landmark vectors."""
    def __init__(self,
                 clustering_algorithm='dotprod',
                 clustering_params={},
                 cutoff=2.0,
                 minimum_site_occupancy=0.1,
                 peak_evening='none',
                 weighted_site_positions=True,
                 check_for_zero_landmarks=True,
                 static_movement_threshold=1.0,
                 dynamic_lattice_mapping=False,
                 relaxed_lattice_checks=False,
                 max_mobile_per_site=1,
                 force_no_memmap=False,
                 verbose=True):
        """
        :param double cutoff: The distance cutoff for the landmark vectors. (unitless)
        :param double minimum_site_occupancy = 0.1: Minimum occupancy (% of time occupied)
            for a site to qualify as such.
        :param dict clustering_params: Parameters for the chosen clustering_algorithm
        :param str peak_evening: Whether and what kind of peak "evening" to apply;
            that is, processing that makes all large peaks in the landmark vector
            more similar in magnitude. This can help in site clustering.

            Valid options: 'none', 'clip'
        :param bool weighted_site_positions: When computing site positions, whether
            to weight the average by assignment confidence.
        :param bool check_for_zero_landmarks: Whether to check for and raise exceptions
            when all-zero landmark vectors are computed.
        :param float static_movement_threshold: (Angstrom) the maximum allowed
            distance between an instantanous static atom position and it's ideal position.
        :param bool dynamic_lattice_mapping: Whether to dynamically decide each
            frame which static atom represents each average lattice position;
            this allows the LandmarkAnalysis to deal with, say, a rare exchage of
            two static atoms that does not change the structure of the lattice.

            It does NOT allow LandmarkAnalysis to deal with lattices whose structures
            actually change over the course of the trajectory.

            In certain cases this is better delt with by MergeSitesByDynamics.
        :param int max_mobile_per_site: The maximum number of mobile atoms that can
            be assigned to a single site without throwing an error. Regardless of the
            value, assignments of more than one mobile atom to a single site will
            be recorded and reported.

            Setting this to 2 can be necessary for very diffusive, liquid-like
            materials at high temperatures.

            Statistics related to this are reported in self.avg_mobile_per_site
            and self.n_multiple_assignments.
        :param bool force_no_memmap: if True, landmark vectors will be stored only in memory.
            Only useful if access to landmark vectors after the analysis has run is desired.
        :param bool verbose: If `True`, progress bars and messages will be printed to stdout.
        """

        self._cutoff = cutoff
        self._minimum_site_occupancy = minimum_site_occupancy

        self._cluster_algo = clustering_algorithm
        self._clustering_params = clustering_params

        if not peak_evening in ['none', 'clip']:
            raise ValueError("Invalid value `%s` for peak_evening" %
                             peak_evening)
        self._peak_evening = peak_evening

        self.verbose = verbose
        self.check_for_zero_landmarks = check_for_zero_landmarks
        self.weighted_site_positions = weighted_site_positions
        self.dynamic_lattice_mapping = dynamic_lattice_mapping
        self.relaxed_lattice_checks = relaxed_lattice_checks

        self._landmark_vectors = None
        self._landmark_dimension = None

        self.static_movement_threshold = static_movement_threshold
        self.max_mobile_per_site = max_mobile_per_site

        self.force_no_memmap = force_no_memmap

        self._has_run = False

    @property
    def cutoff(self):
        return self._cutoff

    @analysis_result
    def landmark_vectors(self):
        view = self._landmark_vectors[:]
        view.flags.writeable = False
        return view

    @analysis_result
    def landmark_dimension(self):
        return self._landmark_dimension

    def run(self, sn, frames):
        """Run the landmark analysis.

        The input SiteNetwork is a network of predicted sites; it's sites will
        be used as the "basis" for the landmark vectors.

        Takes a SiteNetwork and returns a SiteTrajectory.
        """
        assert isinstance(sn, SiteNetwork)

        if self._has_run:
            raise ValueError("Cannot rerun LandmarkAnalysis!")

        if frames.shape[1:] != (sn.n_total, 3):
            raise ValueError("Wrong shape %s for frames." % frames.shape)

        if sn.vertices is None:
            raise ValueError("Input SiteNetwork must have vertices")

        n_frames = len(frames)

        if self.verbose:
            print "--- Running Landmark Analysis ---"

        # Create PBCCalculator
        self._pbcc = PBCCalculator(sn.structure.cell)

        # -- Step 1: Compute site-to-vertex distances
        self._landmark_dimension = sn.n_sites

        longest_vert_set = np.max([len(v) for v in sn.vertices])
        verts_np = np.array(
            [v + [-1] * (longest_vert_set - len(v)) for v in sn.vertices])
        site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float)
        site_vert_dists.fill(np.nan)

        for i, polyhedron in enumerate(sn.vertices):
            verts_poses = sn.static_structure.get_positions()[polyhedron]
            dists = self._pbcc.distances(sn.centers[i], verts_poses)
            site_vert_dists[i, :len(polyhedron)] = dists

        # -- Step 2: Compute landmark vectors
        if self.verbose: print "  - computing landmark vectors -"
        # Compute landmark vectors

        # The dimension of one landmark vector is the number of Voronoi regions
        shape = (n_frames * sn.n_mobile, self._landmark_dimension)

        with tempfile.NamedTemporaryFile() as mmap_backing:
            if self.force_no_memmap:
                self._landmark_vectors = np.empty(shape=shape, dtype=np.float)
            else:
                self._landmark_vectors = np.memmap(mmap_backing.name,
                                                   mode='w+',
                                                   dtype=np.float,
                                                   shape=shape)

            helpers._fill_landmark_vectors(
                self,
                sn,
                verts_np,
                site_vert_dists,
                frames,
                check_for_zeros=self.check_for_zero_landmarks,
                tqdm=tqdm)

            # -- Step 3: Cluster landmark vectors
            if self.verbose: print "  - clustering landmark vectors -"
            #  - Preprocess -
            self._do_peak_evening()

            #  - Cluster -
            cluster_func = importlib.import_module(
                "..cluster." + self._cluster_algo,
                package=__name__).do_landmark_clustering

            cluster_counts, lmk_lbls, lmk_confs = \
                cluster_func(self._landmark_vectors,
                             clustering_params = self._clustering_params,
                             min_samples = self._minimum_site_occupancy / float(sn.n_mobile),
                             verbose = self.verbose)

        if self.verbose:
            print "    Failed to assign %i%% of mobile particle positions to sites." % (
                100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls)))

        # reshape lables and confidences
        lmk_lbls.shape = (n_frames, sn.n_mobile)
        lmk_confs.shape = (n_frames, sn.n_mobile)

        n_sites = len(cluster_counts)

        if n_sites < sn.n_mobile:
            raise ValueError(
                "There are %i mobile particles, but only identified %i sites. Check clustering_params."
                % (sn.n_mobile, n_sites))

        if self.verbose:
            print "    Identified %i sites with assignment counts %s" % (
                n_sites, cluster_counts)

        # Check that multiple particles are never assigned to one site at the
        # same time, cause that would be wrong.
        n_more_than_ones = 0
        avg_mobile_per_site = 0
        divisor = 0
        for frame_i, site_frame in enumerate(lmk_lbls):
            _, counts = np.unique(site_frame[site_frame >= 0],
                                  return_counts=True)
            count_msk = counts > self.max_mobile_per_site
            if np.any(count_msk):
                raise ValueError(
                    "%i mobile particles were assigned to only %i site(s) (%s) at frame %i."
                    % (np.sum(counts[count_msk]), np.sum(count_msk),
                       np.where(count_msk)[0], frame_i))
            n_more_than_ones += np.sum(counts > 1)
            avg_mobile_per_site += np.sum(counts)
            divisor += len(counts)

        self.n_multiple_assignments = n_more_than_ones
        self.avg_mobile_per_site = avg_mobile_per_site / float(divisor)

        # -- Do output
        # - Compute site centers
        site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype)

        for site in xrange(n_sites):
            mask = lmk_lbls == site
            pts = frames[:, sn.mobile_mask][mask]
            if self.weighted_site_positions:
                site_centers[site] = self._pbcc.average(
                    pts, weights=lmk_confs[mask])
            else:
                site_centers[site] = self._pbcc.average(pts)

        # Build output obejcts
        out_sn = sn.copy()

        out_sn.centers = site_centers
        assert out_sn.vertices is None

        out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs)
        out_st.set_real_traj(frames)
        self._has_run = True

        return out_st

    # -------- "private" methods --------

    def _do_peak_evening(self):
        if self._peak_evening == 'none':
            return
        elif self._peak_evening == 'clip':
            lvec_peaks = np.max(self._landmark_vectors, axis=1)
            # Clip all peaks to the lowest "normal" (stdev.) peak
            lvec_clip = np.mean(lvec_peaks) - np.std(lvec_peaks)
            # Do the clipping
            self._landmark_vectors[
                self._landmark_vectors > lvec_clip] = lvec_clip
class LandmarkAnalysis(object):
    """Site analysis of mobile atoms in a static lattice with landmark analysis.

    :param double cutoff_center: The midpoint for the logistic function used
        as the landmark cutoff function. (unitless)
    :param double cutoff_steepness: Steepness of the logistic cutoff function.
    :param double minimum_site_occupancy = 0.1: Minimum occupancy (% of time occupied)
        for a site to qualify as such.
    :param str clustering_algorithm: The landmark clustering algorithm. ``sitator``
        supplies two:
         - ``"dotprod"``: The method described in our "Unsupervised landmark
            analysis for jump detection in molecular dynamics simulations" paper.
         - ``"mcl"``: A newer method we are developing.
    :param dict clustering_params: Parameters for the chosen ``clustering_algorithm``.
    :param str site_centers_method: The method to use for computing the real
        space positions of the sites. Options:
         - ``SITE_CENTERS_REAL_UNWEIGHTED``: A spatial average of all real-space
            mobile atom positions assigned to the site is taken.
         - ``SITE_CENTERS_REAL_WEIGHTED``: A spatial average of all real-space
            mobile atom positions assigned to the site is taken, weighted
            by the confidences with which they assigned to the site.
         - ``SITE_CENTERS_REPRESENTATIVE_LANDMARK``: A spatial average over
            all landmarks' centers is taken, weighted by the representative
            or "typical" landmark vector at the site.
        The "real" methods will generally be more faithful to the simulation,
        but the representative landmark method can work better in cases with
        short trajectories, producing a more "ideal" site location.
    :param bool check_for_zero_landmarks: Whether to check for and raise exceptions
        when all-zero landmark vectors are computed.
    :param float static_movement_threshold: (Angstrom) the maximum allowed
        distance between an instantanous static atom position and it's ideal position.
    :param bool dynamic_lattice_mapping: Whether to dynamically decide each
        frame which static atom represents each average lattice position;
        this allows the LandmarkAnalysis to deal with, say, a rare exchage of
        two static atoms that does not change the structure of the lattice.

        It does NOT allow LandmarkAnalysis to deal with lattices whose structures
        actually change over the course of the trajectory.

        In certain cases this is better delt with by ``MergeSitesByDynamics``.
    :param int max_mobile_per_site: The maximum number of mobile atoms that can
        be assigned to a single site without throwing an error. Regardless of the
        value, assignments of more than one mobile atom to a single site will
        be recorded and reported.

        Setting this to 2 can be necessary for very diffusive, liquid-like
        materials at high temperatures.

        Statistics related to this are reported in ``self.avg_mobile_per_site``
        and ``self.n_multiple_assignments``.
    :param bool force_no_memmap: if True, landmark vectors will be stored only in memory.
        Only useful if access to landmark vectors after the analysis has run is desired.
    :param bool verbose: Verbosity for the ``clustering_algorithm``. Other output
        controlled through ``logging``.
    """

    SITE_CENTERS_REAL_UNWEIGHTED = 'real-unweighted'
    SITE_CENTERS_REAL_WEIGHTED = 'real-weighted'
    SITE_CENTERS_REPRESENTATIVE_LANDMARK = 'representative-landmark'

    CLUSTERING_CLUSTER_SIZE = 'cluster-size'
    CLUSTERING_LABELS = 'cluster-labels'
    CLUSTERING_CONFIDENCES = 'cluster-confs'
    CLUSTERING_LANDMARK_GROUPINGS = 'cluster-landmark-groupings'
    CLUSTERING_REPRESENTATIVE_LANDMARKS = 'cluster-representative-lvecs'

    def __init__(self,
                 clustering_algorithm='dotprod',
                 clustering_params={},
                 cutoff_midpoint=1.5,
                 cutoff_steepness=30,
                 minimum_site_occupancy=0.01,
                 site_centers_method=SITE_CENTERS_REAL_WEIGHTED,
                 check_for_zero_landmarks=True,
                 static_movement_threshold=1.0,
                 dynamic_lattice_mapping=False,
                 relaxed_lattice_checks=False,
                 max_mobile_per_site=1,
                 force_no_memmap=False,
                 verbose=True):
        self._cutoff_midpoint = cutoff_midpoint
        self._cutoff_steepness = cutoff_steepness
        self._minimum_site_occupancy = minimum_site_occupancy

        self._cluster_algo = clustering_algorithm
        self._clustering_params = clustering_params

        self.verbose = verbose
        self.check_for_zero_landmarks = check_for_zero_landmarks
        self.site_centers_method = site_centers_method
        self.dynamic_lattice_mapping = dynamic_lattice_mapping
        self.relaxed_lattice_checks = relaxed_lattice_checks

        self._landmark_vectors = None
        self._landmark_dimension = None

        self.static_movement_threshold = static_movement_threshold
        self.max_mobile_per_site = max_mobile_per_site

        self.force_no_memmap = force_no_memmap

        self._has_run = False

    @property
    def cutoff(self):
        return self._cutoff

    @analysis_result
    def landmark_vectors(self):
        """Landmark vectors from the last invocation of ``run()``"""
        view = self._landmark_vectors[:]
        view.flags.writeable = False
        return view

    @analysis_result
    def landmark_dimension(self):
        """Number of components in a single landmark vector."""
        return self._landmark_dimension

    def run(self, sn, frames):
        """Run the landmark analysis.

        The input ``SiteNetwork`` is a network of predicted sites; it's sites will
        be used as the "basis" for the landmark vectors.

        Wraps a copy of ``frames`` into the unit cell.

        Args:
            sn (SiteNetwork): The landmark basis. Each site is a landmark defined
                by its vertex static atoms, as indicated by `sn.vertices`.
                (Typically from ``VoronoiSiteGenerator``.)
            frames (ndarray n_frames x n_atoms x 3): A trajectory. Can be unwrapped;
                a copy will be wrapped before the analysis.
        """
        assert isinstance(sn, SiteNetwork)

        if self._has_run:
            raise ValueError("Cannot rerun LandmarkAnalysis!")

        if frames.shape[1:] != (sn.n_total, 3):
            raise ValueError("Wrong shape %s for frames." % (frames.shape, ))

        if sn.vertices is None:
            raise ValueError("Input SiteNetwork must have vertices")

        n_frames = len(frames)

        logger.info("--- Running Landmark Analysis ---")

        # Create PBCCalculator
        self._pbcc = PBCCalculator(sn.structure.cell)

        # -- Step 0: Wrap to Unit Cell
        orig_frames = frames  # Keep a reference around
        frames = frames.copy()
        # Flatten to list of points for wrapping
        orig_frame_shape = frames.shape
        frames.shape = (orig_frame_shape[0] * orig_frame_shape[1], 3)
        self._pbcc.wrap_points(frames)
        # Back to list of frames
        frames.shape = orig_frame_shape

        # -- Step 1: Compute site-to-vertex distances
        self._landmark_dimension = sn.n_sites

        longest_vert_set = np.max([len(v) for v in sn.vertices])
        verts_np = np.array([
            np.concatenate((v, [-1] * (longest_vert_set - len(v))))
            for v in sn.vertices
        ],
                            dtype=np.int)
        site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float)
        site_vert_dists.fill(np.nan)

        for i, polyhedron in enumerate(sn.vertices):
            verts_poses = sn.static_structure.get_positions()[polyhedron]
            dists = self._pbcc.distances(sn.centers[i], verts_poses)
            site_vert_dists[i, :len(polyhedron)] = dists

        # -- Step 2: Compute landmark vectors
        logger.info("  - computing landmark vectors -")
        # Compute landmark vectors

        # The dimension of one landmark vector is the number of Voronoi regions
        shape = (n_frames * sn.n_mobile, self._landmark_dimension)

        with tempfile.NamedTemporaryFile() as mmap_backing:
            if self.force_no_memmap:
                self._landmark_vectors = np.empty(shape=shape, dtype=np.float)
            else:
                self._landmark_vectors = np.memmap(mmap_backing.name,
                                                   mode='w+',
                                                   dtype=np.float,
                                                   shape=shape)

            helpers._fill_landmark_vectors(
                self,
                sn,
                verts_np,
                site_vert_dists,
                frames,
                check_for_zeros=self.check_for_zero_landmarks,
                tqdm=tqdm,
                logger=logger)

            if not self.check_for_zero_landmarks and self.n_all_zero_lvecs > 0:
                logger.warning(
                    "     Had %i all-zero landmark vectors; no error because `check_for_zero_landmarks = False`."
                    % self.n_all_zero_lvecs)
            elif self.check_for_zero_landmarks:
                assert self.n_all_zero_lvecs == 0

            # -- Step 3: Cluster landmark vectors
            logger.info("  - clustering landmark vectors -")

            #  - Cluster -
            # FIXME: remove reload after development done
            clustermod = importlib.import_module("..cluster." +
                                                 self._cluster_algo,
                                                 package=__name__)
            importlib.reload(clustermod)
            cluster_func = clustermod.do_landmark_clustering

            clustering = \
                cluster_func(self._landmark_vectors,
                             clustering_params = self._clustering_params,
                             min_samples = self._minimum_site_occupancy / float(sn.n_mobile),
                             verbose = self.verbose)

        cluster_counts = clustering[LandmarkAnalysis.CLUSTERING_CLUSTER_SIZE]
        lmk_lbls = clustering[LandmarkAnalysis.CLUSTERING_LABELS]
        lmk_confs = clustering[LandmarkAnalysis.CLUSTERING_CONFIDENCES]
        if LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS in clustering:
            landmark_clusters = clustering[
                LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS]
            assert len(cluster_counts) == len(landmark_clusters)
        else:
            landmark_clusters = None
        if LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS in clustering:
            rep_lvecs = np.asarray(clustering[
                LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS])
            assert rep_lvecs.shape == (len(cluster_counts),
                                       self._landmark_vectors.shape[1])
        else:
            rep_lvecs = None

        logging.info(
            "    Failed to assign %i%% of mobile particle positions to sites."
            % (100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls))))

        # reshape lables and confidences
        lmk_lbls.shape = (n_frames, sn.n_mobile)
        lmk_confs.shape = (n_frames, sn.n_mobile)

        n_sites = len(cluster_counts)

        if n_sites < (sn.n_mobile / self.max_mobile_per_site):
            raise InsufficientSitesError(verb="Landmark analysis",
                                         n_sites=n_sites,
                                         n_mobile=sn.n_mobile)

        logging.info("    Identified %i sites with assignment counts %s" %
                     (n_sites, cluster_counts))

        # -- Do output
        out_sn = sn.copy()
        # - Compute site centers
        site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype)
        if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED or \
           self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_UNWEIGHTED:
            for site in range(n_sites):
                mask = lmk_lbls == site
                pts = frames[:, sn.mobile_mask][mask]
                if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED:
                    site_centers[site] = self._pbcc.average(
                        pts, weights=lmk_confs[mask])
                else:
                    site_centers[site] = self._pbcc.average(pts)
        elif self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REPRESENTATIVE_LANDMARK:
            if rep_lvecs is None:
                raise ValueError(
                    "Chosen clustering method (with current parameters) didn't return representative landmark vectors; can't use SITE_CENTERS_REPRESENTATIVE_LANDMARK."
                )
            for site in range(n_sites):
                weights_nonzero = rep_lvecs[site] > 0
                site_centers[site] = self._pbcc.average(
                    sn.centers[weights_nonzero],
                    weights=rep_lvecs[site, weights_nonzero])
        else:
            raise ValueError("Invalid site centers method '%s'" %
                             self.site_centers_method)
        out_sn.centers = site_centers
        # - If clustering gave us that, compute site vertices
        if landmark_clusters is not None:
            vertices = []
            for lclust in landmark_clusters:
                vertices.append(
                    set.union(*[set(sn.vertices[l]) for l in lclust]))
            out_sn.vertices = vertices

        out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs)

        # Check that multiple particles are never assigned to one site at the
        # same time, cause that would be wrong.
        self.n_multiple_assignments, self.avg_mobile_per_site = out_st.check_multiple_occupancy(
            max_mobile_per_site=self.max_mobile_per_site)

        out_st.set_real_traj(orig_frames)
        self._has_run = True

        return out_st
    def run(self, st):
        """Takes a SiteTrajectory and returns a SiteTrajectory, including a new SiteNetwork."""

        if self.check_types and st.site_network.site_types is None:
            raise ValueError(
                "Cannot run a check_types=True MergeSitesByDynamics on a SiteTrajectory without type information."
            )

        # Compute jump statistics
        if not st.site_network.has_attribute('p_ij'):
            ja = JumpAnalysis(verbose=self.verbose)
            ja.run(st)

        pbcc = PBCCalculator(st.site_network.structure.cell)
        site_centers = st.site_network.centers
        if self.check_types:
            site_types = st.site_network.site_types

        connectivity_matrix = st.site_network.p_ij
        assert st.site_network.n_sites == connectivity_matrix.shape[0]

        clusters = self._markov_clustering(connectivity_matrix,
                                           **self.markov_parameters)

        new_n_sites = len(clusters)

        if self.verbose:
            print "After merge there will be %i sites" % new_n_sites

        if self.check_types:
            new_types = np.empty(shape=new_n_sites, dtype=np.int)

        new_centers = np.empty(shape=(new_n_sites, 3),
                               dtype=st.site_network.centers.dtype)
        translation = np.empty(shape=st.site_network.n_sites, dtype=np.int)
        translation.fill(-1)

        for newsite in xrange(new_n_sites):
            mask = list(clusters[newsite])
            # Update translation table
            if np.any(translation[mask] != -1):
                # We've assigned a different cluster for this before... weird
                # degeneracy
                raise ValueError(
                    "Markov clustering tried to merge site(s) into more than one new site"
                )
            translation[mask] = newsite

            to_merge = site_centers[mask]

            # Check distances
            dists = pbcc.distances(to_merge[0], to_merge[1:])

            assert np.all(
                dists < self.distance_threshold
            ), "Markov clustering tried to merge sites more than %f apart -- this may be valid, and the distance threshold may need to be increased." % self.distance_threshold

            # New site center
            new_centers[newsite] = pbcc.average(to_merge)
            if self.check_types:
                assert np.all(site_types[mask] == site_types[mask][0])
                new_types[newsite] = site_types[mask][0]

        newsn = st.site_network.copy()
        newsn.centers = new_centers
        if self.check_types:
            newsn.site_types = new_types

        newtraj = translation[st._traj]
        newtraj[st._traj ==
                SiteTrajectory.SITE_UNKNOWN] = SiteTrajectory.SITE_UNKNOWN

        # It doesn't make sense to propagate confidence information through a
        # transform that might completely invalidate it
        newst = SiteTrajectory(newsn, newtraj, confidences=None)

        if not st.real_trajectory is None:
            newst.set_real_traj(st.real_trajectory)

        return newst
Beispiel #7
0
    def run(self, st, **kwargs):
        """Takes a ``SiteTrajectory`` and returns a new ``SiteTrajectory``."""

        if self.check_types and st.site_network.site_types is None:
            raise ValueError(
                "Cannot run a check_types=True MergeSites on a SiteTrajectory without type information."
            )

        # -- Compute jump statistics
        pbcc = PBCCalculator(st.site_network.structure.cell)
        site_centers = st.site_network.centers
        if self.check_types:
            site_types = st.site_network.site_types

        clusters = self._get_sites_to_merge(st, **kwargs)

        old_n_sites = st.site_network.n_sites
        new_n_sites = len(clusters)

        logger.info(
            "After merging %i sites there will be %i sites for %i mobile particles"
            % (len(site_centers), new_n_sites, st.site_network.n_mobile))

        if new_n_sites < st.site_network.n_mobile:
            raise InsufficientSitesError(verb="Merging",
                                         n_sites=new_n_sites,
                                         n_mobile=st.site_network.n_mobile)

        if self.check_types:
            new_types = np.empty(shape=new_n_sites, dtype=np.int)
        merge_verts = st.site_network.vertices is not None
        if merge_verts:
            new_verts = []

        # -- Merge Sites
        new_centers = np.empty(shape=(new_n_sites, 3),
                               dtype=st.site_network.centers.dtype)
        translation = np.empty(shape=st.site_network.n_sites, dtype=np.int)
        translation.fill(-1)

        for newsite in range(new_n_sites):
            mask = list(clusters[newsite])
            # Update translation table
            if np.any(translation[mask] != -1):
                # We've assigned a different cluster for this before... weird
                # degeneracy
                raise ValueError(
                    "Site merging tried to merge site(s) into more than one new site. This shouldn't happen."
                )
            translation[mask] = newsite

            to_merge = site_centers[mask]

            # Check distances
            if not self.maximum_merge_distance is None:
                dists = pbcc.distances(to_merge[0], to_merge[1:])
                if not np.all(dists <= self.maximum_merge_distance):
                    raise MergedSitesTooDistantError(
                        "Markov clustering tried to merge sites more than %.2f apart. Lower your distance_threshold?"
                        % self.maximum_merge_distance)

            # New site center
            if self.weighted_spatial_average:
                new_centers[newsite] = pbcc.average(to_merge)
            else:
                occs = st.site_network.occupancies[mask]
                new_centers[newsite] = pbcc.average(to_merge, weights=occs)

            if self.check_types:
                assert np.all(site_types[mask] == site_types[mask][0])
                new_types[newsite] = site_types[mask][0]
            if merge_verts:
                new_verts.append(
                    set.union(
                        *[set(st.site_network.vertices[i]) for i in mask]))

        newsn = st.site_network.copy()
        newsn.centers = new_centers
        if self.check_types:
            newsn.site_types = new_types
        if merge_verts:
            newsn.vertices = new_verts

        newtraj = translation[st._traj]
        newtraj[st._traj ==
                SiteTrajectory.SITE_UNKNOWN] = SiteTrajectory.SITE_UNKNOWN

        # It doesn't make sense to propagate confidence information through a
        # transform that might completely invalidate it
        newst = SiteTrajectory(newsn, newtraj, confidences=None)

        if not st.real_trajectory is None:
            newst.set_real_traj(st.real_trajectory)

        if self.set_merged_into:
            if st.site_network.has_attribute("merged_into"):
                st.site_network.remove_attribute("merged_into")
            st.site_network.add_site_attribute("merged_into", translation)

        return newst