Ejemplo n.º 1
0
    def test_selfdist(self):
        from MDAnalysis.lib.distances import self_distance_array
        from MDAnalysis.lib.distances import transform_RtoS, transform_StoR

        R_coords = transform_StoR(self.S_mol1, self.box, backend=self.backend)
        # Transform functions are tested elsewhere so taken as working here
        dists = self_distance_array(R_coords,
                                    box=self.box,
                                    backend=self.backend)
        # Manually calculate self_distance_array
        manual = np.zeros(len(dists), dtype=np.float64)
        distpos = 0
        for i, Ri in enumerate(R_coords):
            for Rj in R_coords[i + 1:]:
                Rij = Rj - Ri
                Rij -= round(Rij[2] / self.box[2][2]) * self.box[2]
                Rij -= round(Rij[1] / self.box[1][1]) * self.box[1]
                Rij -= round(Rij[0] / self.box[0][0]) * self.box[0]
                Rij = np.linalg.norm(Rij)  # find norm of Rij vector
                manual[distpos] = Rij  # and done, phew
                distpos += 1

        assert_almost_equal(dists,
                            manual,
                            self.prec,
                            err_msg="self_distance_array failed with input 1")

        # Do it again for input 2 (has wider separation in points)
        # Also use boxV here in self_dist calculation
        R_coords = transform_StoR(self.S_mol2, self.box, backend=self.backend)
        # Transform functions are tested elsewhere so taken as working here
        dists = self_distance_array(R_coords,
                                    box=self.boxV,
                                    backend=self.backend)
        # Manually calculate self_distance_array
        manual = np.zeros(len(dists), dtype=np.float64)
        distpos = 0
        for i, Ri in enumerate(R_coords):
            for Rj in R_coords[i + 1:]:
                Rij = Rj - Ri
                Rij -= round(Rij[2] / self.box[2][2]) * self.box[2]
                Rij -= round(Rij[1] / self.box[1][1]) * self.box[1]
                Rij -= round(Rij[0] / self.box[0][0]) * self.box[0]
                Rij = np.linalg.norm(Rij)  # find norm of Rij vector
                manual[distpos] = Rij  # and done, phew
                distpos += 1

        assert_almost_equal(dists,
                            manual,
                            self.prec,
                            err_msg="self_distance_array failed with input 2")
Ejemplo n.º 2
0
    def test_selfdist(self):
        from MDAnalysis.lib.distances import self_distance_array
        from MDAnalysis.lib.distances import transform_RtoS, transform_StoR

        R_coords = transform_StoR(self.S_mol1, self.box, backend=self.backend)
        # Transform functions are tested elsewhere so taken as working here
        dists = self_distance_array(R_coords, box=self.box, backend=self.backend)
        # Manually calculate self_distance_array
        manual = np.zeros(len(dists), dtype=np.float64)
        distpos = 0
        for i, Ri in enumerate(R_coords):
            for Rj in R_coords[i + 1:]:
                Rij = Rj - Ri
                Rij -= round(Rij[2] / self.box[2][2]) * self.box[2]
                Rij -= round(Rij[1] / self.box[1][1]) * self.box[1]
                Rij -= round(Rij[0] / self.box[0][0]) * self.box[0]
                Rij = np.linalg.norm(Rij)  # find norm of Rij vector
                manual[distpos] = Rij  # and done, phew
                distpos += 1

        assert_almost_equal(dists, manual, self.prec,
                            err_msg="self_distance_array failed with input 1")

        # Do it again for input 2 (has wider separation in points)
        # Also use boxV here in self_dist calculation
        R_coords = transform_StoR(self.S_mol2, self.box, backend=self.backend)
        # Transform functions are tested elsewhere so taken as working here
        dists = self_distance_array(R_coords, box=self.boxV, backend=self.backend)
        # Manually calculate self_distance_array
        manual = np.zeros(len(dists), dtype=np.float64)
        distpos = 0
        for i, Ri in enumerate(R_coords):
            for Rj in R_coords[i + 1:]:
                Rij = Rj - Ri
                Rij -= round(Rij[2] / self.box[2][2]) * self.box[2]
                Rij -= round(Rij[1] / self.box[1][1]) * self.box[1]
                Rij -= round(Rij[0] / self.box[0][0]) * self.box[0]
                Rij = np.linalg.norm(Rij)  # find norm of Rij vector
                manual[distpos] = Rij  # and done, phew
                distpos += 1

        assert_almost_equal(dists, manual, self.prec,
                            err_msg="self_distance_array failed with input 2")
Ejemplo n.º 3
0
def calculate_cluster_per_atom(ag, cutoff, backend='serial'):
    """
    Distance based clustering.

    Faster numpy implementation, gives the same results as ``cluster/atom`` in LAMMPS .

    Speedup for 100 atoms:
        * this implementation : ``864 µs ± 7.63 µs``
        * original code (written as in C) : ``7.4 s ± 47.6 ms``

    Parameters
    ----------
    ag : MDAnalysis.core.groups.AtomGroup
        Atom group to cluster
    cutoff : float
        Cutoff distance
    backend : str, optional
        Which backend to use for the distance calculation.
        Note: ``serial`` may be faster then ``OpenMP`` for smaller systems.
        Default is ``serial``.

    Returns
    -------
    clusterID : np.ndarray
        Array with the cluster ID's of shape ``(ag.n_atoms,)``.
        The cluster ID is the lowest ID of the atoms in the cluster.
    """
    clusterID = ag.ids + 1

    n_atoms = ag.n_atoms
    distances = self_distance_array(ag.positions,
                                    box=ag.ts.dimensions,
                                    backend=backend)
    ids = np.where(distances < cutoff)[0]

    mi, mj = np.array(np.triu_indices(n_atoms, k=1))[:, ids]

    while True:
        done = True
        mask = np.where(clusterID[mi] != clusterID[mj])[0]
        if mask.size > 0:
            i, j = mi[mask], mj[mask]
            clusterID[i] = clusterID[j] = np.min(np.asarray(
                [clusterID[i], clusterID[j]]),
                                                 axis=0)
            done = False
        if done:
            break
    return clusterID
Ejemplo n.º 4
0
def get_atom_self_distances(pdb,
                            xtc,
                            selection='all',
                            first_frame=0,
                            last_frame=-1,
                            step=1):
    """
    Load distances between all selected atoms.

    Parameters
    ----------
    pdb : str
        File name for the reference file (PDB or GRO format).
    xtc : str
        File name for the trajectory (xtc format).
    selection : str
        Selection string to choose which atoms to include. Default: all.
    first_frame : int, default=0
        First frame to return of the features. Zero-based.
    last_frame : int, default=-1
        Last frame to return of the features. Zero-based.
    step : int, default=1
        Subsampling step width when reading the frames.

    Returns
    -------
    feature_names : list of str
        Names of all distances
    features_data : numpy array
        Data for all distances [Å]

    """

    u = mda.Universe(pdb, xtc)
    a = u.select_atoms(selection)
    num_at = len(a)

    # Name the atoms
    at_labels = [
        '%s %s %s' % (atom.residue.resname, atom.resid, atom.name)
        for atom in a
    ]

    # Name the distance labels
    d_labels = []
    k = -1
    for i in range(num_at):
        for j in range(i + 1, num_at):
            k += 1
            _dl = 'DIST: %s - %s' % (at_labels[i], at_labels[j])
            d_labels.append(_dl)

    # Calculate the distances
    num_at = len(a)
    num_dist = int(num_at * (num_at - 1) / 2)
    len_traj = len(u.trajectory[first_frame:last_frame:step])
    template = np.zeros([
        num_dist,
    ])
    data_arr = np.zeros([len_traj, num_dist])
    frame = 0
    for ts in u.trajectory[first_frame:last_frame:step]:
        data_arr[frame] = ld.self_distance_array(a.positions, result=template)
        frame += 1

    return d_labels, data_arr
Ejemplo n.º 5
0
def calculate_rdf_intra(ag,
                        bin_range=(0, 15),
                        bins=100,
                        start=0,
                        end=None,
                        step=None,
                        backend='serial',
                        max_memory_usage=None,
                        verbose=False
                        ):
    """
    Calculates the radial distribution function for all atoms in the AtomGroup with them self.

    Parameter
    ---------
    ag : MDAnalysis.core.groups.AtomGroup
        Atom group
    bin_range : tuple(int,int), optional
        Bin range to use. Default is `(0, 15)`.
    bins : int, optional
        Number of bins used. Default is `100`.
    start : int, optional
        Starting frame. Default is `0`.
    end : int or None, optional
        Final frame. `None` for last frame. Default is `None`.
    step : int, optional
        Step size. Default is `1`.
    backend : str, optional
        Backend to use.  `{'serial', 'OpenMP'}`. Default is `serial`.
    max_memory_usage : int or None, optional
        Maximum memory to use.
        If it's not `None`, results will be buffered before calculating the histogram.
        Default is `None`.
    verbose : bool, optional
        Turns on verbosity

    Returns
    -------
    bins : numpy.ndarray
        Array of the bin centers.
    rdf : numpy.ndarray
        Radial distribution function.

    Examples
    --------
    >>> bins, rdf = calculate_rdf_intra(ag, bins=100, range=(0, 15),
                                        start=0, end=1000,
                                        backend='OpenMP',
                                        max_memory_usage=2 * 1024**3, # 2 GB
                                        verbose=True)
    """

    # settings
    rdf_settings = dict(bins=bins,
                        range=bin_range)

    # general constants
    n_frames = len(ag.universe.trajectory[start:end:step])
    n_pairs = ag.n_atoms * (ag.n_atoms - 1) // 2

    # handle maximum memory requirement
    if max_memory_usage is not None:
        # calculate the needed array size
        size_distance_array = int(n_pairs * 8)
        n_buffer = max_memory_usage // size_distance_array
        assert n_buffer != 0, "Not enough memory to calculate the rdf with this implementation. " + \
                              "Need at least {} bytes".format(size_distance_array)
    else:
        n_buffer = 1

    # init storage array
    dummy_storage = np.empty((n_buffer, n_pairs), dtype=np.float64)
    # init histogram
    count, edges = np.histogram([-1], **rdf_settings)
    volume = 0  # initialize the volume
    b = 0       # initialize buffer

    if verbose:
        p = ProgressReporter_()
        p.register(n_frames, description="calculate RDF")
    for ts in ag.universe.trajectory[start:end:step]:
        if verbose:
            p.update(1)
        # calculate distances
        self_distance_array(ag.positions, box=ts.dimensions,
                            result=dummy_storage[b], backend=backend)
        b += 1  # go to the next buffer
        if b == n_buffer:
            tmp_count, _ = np.histogram(dummy_storage, **rdf_settings)
            count += tmp_count
            b = 0  # reset b

        volume += ts.volume
    if b > 0:
        tmp_count, _ = np.histogram(dummy_storage[:b], **rdf_settings)
        count += tmp_count
    if verbose:
        p.finish()

    # Volume in each radial shell
    vol = np.power(edges[1:], 3) - np.power(edges[:-1], 3)
    vol *= 4 / 3.0 * np.pi

    # Average number density
    box_vol = volume / n_frames
    density = n_pairs / box_vol

    rdf = count / (density * vol * n_frames)
    bins = (edges[:-1] + edges[1:]) / 2.0

    return bins, rdf
Ejemplo n.º 6
0
def calculate_cluster_around_group(ag_cluster,
                                   ag,
                                   cutoff,
                                   cutoff_cluster=None,
                                   backend='serial'):
    """
    Distance based clustering around a initial atom group

    Parameters
    ----------
    ag_cluster : MDAnalysis.core.groups.AtomGroup
        Atom group of the initial cluster
    ag : MDAnalysis.core.groups.AtomGroup
        Atom group to cluster
    cutoff : float
        Cutoff distance for the clustering of atoms in ``ag``.
    cutoff_cluster : float or None, optional
        Cutoff distance for the clustering of atoms in ``ag`` with atoms in ``ag_cluster``.
        If it's ``None``, ``cutoff`` will be used. Default is ``None``.
    backend : str, optional
        Which backend to use for the distance calculation.
        Note: ``serial`` may be faster then ``OpenMP`` for smaller systems.
        Default is ``serial``.

    Returns
    -------
    ag : MDAnalysis.core.groups.AtomGroup
        Atomgroup with atoms of the initial configuration and clustered ones
    """
    if cutoff_cluster is None:
        cutoff_cluster = cutoff

    n_atoms = ag.n_atoms
    clusterID = np.zeros((n_atoms, ), dtype=np.bool)

    # get connected molecules
    distances = distance_array(ag_cluster.positions,
                               ag.positions,
                               box=ag.ts.dimensions,
                               backend=backend)
    ids = np.where(distances < cutoff_cluster)[1]
    clusterID[ids] = True
    del distances

    # get molecule - molecules clusters
    distances = self_distance_array(ag.positions,
                                    box=ag.ts.dimensions,
                                    backend=backend)
    ids = np.where(distances < cutoff)[0]

    mi, mj = np.array(np.triu_indices(n_atoms, k=1))[:, ids]

    while True:
        done = True
        mask = np.where(clusterID[mi] != clusterID[mj])[0]
        if mask.size > 0:
            i, j = mi[mask], mj[mask]
            clusterID[i] = clusterID[j] = np.max(np.asarray(
                [clusterID[i], clusterID[j]]),
                                                 axis=0)
            done = False
        if done:
            break

    ag_new = ag_cluster.union(ag.atoms[clusterID])
    return ag_new
Ejemplo n.º 7
0
 def _add_distance(positions, box):
     dist = scidist.squareform(
         mddist.self_distance_array(positions, box))
     for i, d in enumerate(dist):
         self.samples.extend(np.sort(d)[1:self.nneigh + 1])