def test_rmsd_matrix(trajectory_benchmark): group = trajectory_benchmark.select_atoms('name CA') indexes = (0, 2, -1) # first, third, and last frames xyz = idpd.extract_coordinates(trajectory_benchmark, group, indexes) rmsd = idpd.rmsd_matrix(xyz, condensed=True) reference = np.array([8.73, 8.92, 8.57]) assert_allclose(rmsd, reference, atol=0.01)
def trajectory_centroids(a_universe, selection='not name H*', segment_length=1000, n_representatives=1000): r"""Cluster a set of consecutive trajectory segments into a set of representative structures via structural similarity (RMSD) The simulated trajectory is divided into consecutive segments, and hierarchical clustering is performed on each segment to yield a limited number of representative structures (centroids) per segment. Parameters ---------- a_universe : :class:`~MDAnalysis.core.universe.Universe` Topology and trajectory. selection : str atoms for which to calculate RMSD. See the `selections page <https://www.mdanalysis.org/docs/documentation_pages/selections.html>`_ for atom selection syntax. segment_length: int divide trajectory into segments of this length n_representatives : int Desired total number of representative structures. The final number may be close but not equal to the desired number. Returns ------- rep_ifr : list Frame indexes of representative structures (centroids) """ # noqa: E501 group = a_universe.select_atoms(selection) # Fragmentation of the trajectory n_frame = len(a_universe.trajectory) n_segments = int(n_frame / segment_length) nc = max(1, int(n_representatives / n_segments)) # clusters per segment rep_ifr = list() # frame indexes of representative structures info = """Clustering the trajectory: Creating {} representatives by partitioning {} frames into {} segments and retrieving {} representatives from each segment. """.format(nc * n_segments, n_frame, n_segments, nc) sys.stdout.write(info) sys.stdout.flush() # Hierarchical clustering on each trajectory fragment for i_segment in tqdm(range(n_segments)): indexes = range(i_segment * segment_length, (i_segment + 1) * segment_length) xyz = extract_coordinates(a_universe, group, indexes) rmsd = rmsd_matrix(xyz, condensed=True) z = hierarchy.linkage(rmsd, method='complete') for node in Tree(z=z).nodes_at_depth(nc - 1): # Find the frame of each representative structure i_frame = i_segment * segment_length + node.representative(rmsd).id rep_ifr.append(i_frame) rep_ifr.sort() return rep_ifr
def cluster_trajectory(a_universe, selection='not name H*', segment_length=1000, n_representatives=1000): r"""Cluster a set of representative structures by structural similarity (RMSD) The simulated trajectory is divided into segments, and hierarchical clustering is performed on each segment to yield a limited number of representative structures. These are then clustered into the final hierachical tree. Parameters ---------- a_universe : :class:`~MDAnalysis.core.universe.Universe` Topology and trajectory. selection : str atoms for which to calculate RMSD. See the `selections page <https://www.mdanalysis.org/docs/documentation_pages/selections.html>`_ for atom selection syntax. segment_length: int divide trajectory into segments of this length n_representatives : int Desired total number of representative structures. The final number may be close but not equal to the desired number. distance_matrix: :class:`~numpy:numpy.ndarray` Returns ------- :class:`~idpflex.cluster.ClusterTrove` clustering results for the representatives """ # noqa: E501 rep_ifr = trajectory_centroids(a_universe, selection=selection, segment_length=segment_length, n_representatives=n_representatives) group = a_universe.select_atoms(selection) xyz = extract_coordinates(a_universe, group, rep_ifr) distance_matrix = rmsd_matrix(xyz, condensed=True) # Cluster the representative structures tree = Tree(z=hierarchy.linkage(distance_matrix, method='complete')) for i_leaf, leaf in enumerate(tree.leafs): prop = ScalarProperty(name='iframe', y=rep_ifr[i_leaf]) leaf[prop.name] = prop return ClusterTrove(rep_ifr, distance_matrix, tree)
def cluster_trajectory(a_universe, selection='not name H*', segment_length=1000, n_representatives=1000): r"""Cluster a set of representative structures The simulated trajectory is divided into segments, and hierarchical clustering is performed on each segment to yield a limited number of representative structures. These are then clustered into the final hiearchical tree. Frame indexes from each segment are collected as cluster representatives. Parameters ---------- a_universe : :class:`~MDAnalysis.core.universe.Universe` Topology and trajectory. selection : str atoms for which to calculate RMSD segment_length: int divide trajectory into chunks of this length n_representatives : int Target total number of representative structures. The final number may be close but not equal to the target number. Returns ------- :class:`~idpflex.cluster.ClusterTrove` clustering results for the representatives """ group = a_universe.select_atoms(selection) # Fragmentation of the trajectory n_frame = len(a_universe.trajectory) n_segments = int(n_frame / segment_length) nc = max(1, int(n_representatives / n_segments)) # clusters per segment rep_ifr = list() # frame indexes of representative structures info = """Clustering the trajectory: Creating {} representatives by partitioning {} frames into {} segments and retrieving {} representatives from each segment. """.format(nc * n_segments, n_frame, n_segments, nc) sys.stdout.write(info) sys.stdout.flush() # Hierarchical clustering on each trajectory fragment for i_segment in tqdm(range(n_segments)): indexes = range(i_segment * segment_length, (i_segment + 1) * segment_length) xyz = extract_coordinates(a_universe, group, indexes) rmsd = rmsd_matrix(xyz, condensed=True) z = hierarchy.linkage(rmsd, method='complete') for node in Tree(z=z).nodes_at_depth(nc - 1): # Find the frame of each representative structure i_frame = i_segment * segment_length + node.representative(rmsd).id rep_ifr.append(i_frame) rep_ifr.sort() # Cluster the representative structures xyz = extract_coordinates(a_universe, group, rep_ifr) rmsd = rmsd_matrix(xyz, condensed=True) tree = Tree(z=hierarchy.linkage(rmsd, method='complete')) for ileaf, leaf in enumerate(tree.leafs): leaf.add_property(ScalarProperty(name='iframe', y=rep_ifr[ileaf])) return ClusterTrove(rep_ifr, rmsd, tree)