Exemple #1
0
def test_harder_hubscore():
    # depends on tpt.committors and tpt.conditional_committors

    assignments = np.random.randint(10, size=(10, 1000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)

    hub_scores = tpt.hub_scores(msm)

    ref_hub_scores = np.zeros(10)
    for A in xrange(10):
        for B in xrange(10):
            committors = tpt.committors(A, B, msm)
            denom = msm.transmat_[A, :].dot(committors)  #+ msm.transmat_[A, B]
            for C in xrange(10):
                if A == B or A == C or B == C:
                    continue
                cond_committors = tpt.conditional_committors(A, B, C, msm)

                temp = 0.0
                for i in xrange(10):
                    if i in [A, B]:
                        continue
                    temp += cond_committors[i] * msm.transmat_[A, i]
                temp /= denom

                ref_hub_scores[C] += temp

    ref_hub_scores /= (9 * 8)

    #print(ref_hub_scores, hub_scores)

    npt.assert_array_almost_equal(ref_hub_scores, hub_scores)
Exemple #2
0
def test_load():
    filenames = [
        "frame0.xtc", "frame0.trr", "frame0.dcd", "frame0.binpos", "traj.h5",
        "frame0.nc", "traj.h5", "frame0.lammpstrj", "frame0.xyz", "frame0.tng"
    ]
    num_block = 3
    for filename in filenames:
        t0 = md.load(get_fn(filename),
                     top=nat,
                     discard_overlapping_frames=True)
        t1 = md.load(get_fn(filename),
                     top=nat,
                     discard_overlapping_frames=False)
        t2 = md.load([get_fn(filename) for i in xrange(num_block)],
                     top=nat,
                     discard_overlapping_frames=False)
        t3 = md.load([get_fn(filename) for i in xrange(num_block)],
                     top=nat,
                     discard_overlapping_frames=True)

        # these don't actually overlap, so discard_overlapping_frames should
        # have no effect. the overlap is between the last frame of one and the
        # first frame of the next.
        yield lambda: eq(t0.n_frames, t1.n_frames)
        yield lambda: eq(t0.n_frames * num_block, t2.n_frames)
        yield lambda: eq(t3.n_frames, t2.n_frames)
def test_load():
    filenames = [
        "frame0.xtc",
        "frame0.trr",
        "frame0.dcd",
        "frame0.binpos",
        "traj.h5",
        "frame0.nc",
        "traj.h5",
        "frame0.lammpstrj",
        "frame0.xyz",
    ]
    num_block = 3
    for filename in filenames:
        t0 = md.load(get_fn(filename), top=nat, discard_overlapping_frames=True)
        t1 = md.load(get_fn(filename), top=nat, discard_overlapping_frames=False)
        t2 = md.load([get_fn(filename) for i in xrange(num_block)], top=nat, discard_overlapping_frames=False)
        t3 = md.load([get_fn(filename) for i in xrange(num_block)], top=nat, discard_overlapping_frames=True)

        # these don't actually overlap, so discard_overlapping_frames should
        # have no effect. the overlap is between the last frame of one and the
        # first frame of the next.
        yield lambda: eq(t0.n_frames, t1.n_frames)
        yield lambda: eq(t0.n_frames * num_block, t2.n_frames)
        yield lambda: eq(t3.n_frames, t2.n_frames)
Exemple #4
0
def test_fluxes():
    # depends on tpt.committors

    msm = MarkovStateModel(lag_time=1)
    assignments = np.random.randint(3, size=(10, 1000))
    msm.fit(assignments)


    tprob = msm.transmat_
    pop = msm.populations_
    # forward committors
    qplus = tpt.committors(0, 2, msm)
    
    ref_fluxes = np.zeros((3, 3))
    ref_net_fluxes = np.zeros((3, 3))
    for i in xrange(3):
        for j in xrange(3):
            if i != j:
                # Eq. 2.24 in Metzner et al. Transition Path Theory. 
                # Multiscale Model. Simul. 2009, 7, 1192-1219.
                ref_fluxes[i, j] = pop[i] * tprob[i, j] * (1 - qplus[i]) * qplus[j]

    for i in xrange(3):
        for j in xrange(3):
            ref_net_fluxes[i, j] = np.max([0, ref_fluxes[i, j] - ref_fluxes[j, i]])

    fluxes = tpt.fluxes(0, 2, msm)
    net_fluxes = tpt.net_fluxes(0, 2, msm)

    #print(fluxes)
    #print(ref_fluxes)

    npt.assert_array_almost_equal(ref_fluxes, fluxes)
    npt.assert_array_almost_equal(ref_net_fluxes, net_fluxes)
Exemple #5
0
def test_harder_hubscore():
    # depends on tpt.committors and tpt.conditional_committors

    assignments = np.random.randint(10, size=(10, 1000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)
    
    hub_scores = tpt.hub_scores(msm)

    ref_hub_scores = np.zeros(10)
    for A in xrange(10):
        for B in xrange(10):
            committors = tpt.committors(A, B, msm)
            denom = msm.transmat_[A, :].dot(committors) #+ msm.transmat_[A, B]
            for C in xrange(10):
                if A == B or A == C or B == C:
                    continue
                cond_committors = tpt.conditional_committors(A, B, C, msm)

                temp = 0.0
                for i in xrange(10):
                    if i in [A, B]:
                        continue
                    temp += cond_committors[i] * msm.transmat_[A, i]
                temp /= denom

                ref_hub_scores[C] += temp

    ref_hub_scores /= (9 * 8)

    #print(ref_hub_scores, hub_scores)

    npt.assert_array_almost_equal(ref_hub_scores, hub_scores)
Exemple #6
0
def test_load():
    filenames = [
        "frame0.xtc", "frame0.trr", "frame0.dcd", "frame0.binpos", "traj.h5",
        'legacy_msmbuilder_trj0.lh5', 'frame0.nc',
        six.u('traj.h5')
    ]
    num_block = 3
    for filename in filenames:
        t0 = md.load(get_fn(filename),
                     top=nat,
                     discard_overlapping_frames=True)
        t1 = md.load(get_fn(filename),
                     top=nat,
                     discard_overlapping_frames=False)
        t2 = md.load([get_fn(filename) for i in xrange(num_block)],
                     top=nat,
                     discard_overlapping_frames=False)
        t3 = md.load([get_fn(filename) for i in xrange(num_block)],
                     top=nat,
                     discard_overlapping_frames=True)

        # these don't actually overlap, so discard_overlapping_frames should have no effect
        # the overlap is between the last frame of one and the first frame of the next.
        yield lambda: eq(t0.n_frames, t1.n_frames)
        yield lambda: eq(t0.n_frames * num_block, t2.n_frames)
        yield lambda: eq(t3.n_frames, t2.n_frames)
Exemple #7
0
def test_fluxes():
    # depends on tpt.committors

    msm = MarkovStateModel(lag_time=1)
    assignments = np.random.randint(3, size=(10, 1000))
    msm.fit(assignments)

    tprob = msm.transmat_
    pop = msm.populations_
    # forward committors
    qplus = tpt.committors(0, 2, msm)

    ref_fluxes = np.zeros((3, 3))
    ref_net_fluxes = np.zeros((3, 3))
    for i in xrange(3):
        for j in xrange(3):
            if i != j:
                # Eq. 2.24 in Metzner et al. Transition Path Theory.
                # Multiscale Model. Simul. 2009, 7, 1192-1219.
                ref_fluxes[i, j] = (pop[i] * tprob[i, j] * (1 - qplus[i]) *
                                    qplus[j])

    for i in xrange(3):
        for j in xrange(3):
            ref_net_fluxes[i, j] = np.max(
                [0, ref_fluxes[i, j] - ref_fluxes[j, i]])

    fluxes = tpt.fluxes(0, 2, msm)
    net_fluxes = tpt.net_fluxes(0, 2, msm)

    # print(fluxes)
    # print(ref_fluxes)

    npt.assert_array_almost_equal(ref_fluxes, fluxes)
    npt.assert_array_almost_equal(ref_net_fluxes, net_fluxes)
Exemple #8
0
def get_bond_connectivity(conf):
    """Get a list of all the bonds in a conformation

    Parameters
    ----------
    conf : MDTraj.Trajectory
        An MDTraj trajectory, only the first frame will be used.

    Returns
    -------
    ibonds : np.ndarray, shape=[n_bonds, 2], dtype=int
        n_bonds x 2 array of indices, where each row is the index of two
        atom who participate in a bond.

    Notes
    -----
    Regular bonds are assigned to all pairs of atoms where
    the interatomic distance is less than or equal to 1.3 times the
    sum of their respective covalent radii.

    References
    ----------
    Bakken and Helgaker, JCP Vol. 117, Num. 20 22 Nov. 2002
    http://folk.uio.no/helgaker/reprints/2002/JCP117b_GeoOpt.pdf
    """
    from scipy.spatial.distance import squareform, pdist

    xyz = conf.xyz[0, :, :]
    n_atoms = xyz.shape[0]

    elements = np.zeros(n_atoms, dtype='S1')
    atom_names = [a.name for a in conf.top.atoms()]
    for i in xrange(n_atoms):
        # name of the element that is atom[i]
        # take the first character of the AtomNames string,
        # after stripping off any digits

        elements[i] = atom_names[i].strip('123456789 ')[0]
        if not elements[i] in COVALENT_RADII.keys():
            raise ValueError("I don't know about this AtomName: {}".format(
                atom_names[i]))

    distance_mtx = squareform(pdist(xyz))
    connectivity = []

    for i in xrange(n_atoms):
        for j in xrange(i + 1, n_atoms):
            # Regular bonds are assigned to all pairs of atoms where
            # the interatomic distance is less than or equal to 1.3 times the
            # sum of their respective covalent radii.
            d = distance_mtx[i, j]
            if d < 1.3 * (COVALENT_RADII[elements[i]] +
                          COVALENT_RADII[elements[j]]):
                connectivity.append((i, j))

    return np.array(connectivity)
Exemple #9
0
def get_bond_connectivity(conf):
    """Get a list of all the bonds in a conformation

    Parameters
    ----------
    conf : MDTraj.Trajectory
        An MDTraj trajectory, only the first frame will be used.

    Returns
    -------
    ibonds : np.ndarray, shape=[n_bonds, 2], dtype=int
        n_bonds x 2 array of indices, where each row is the index of two
        atom who participate in a bond.

    Notes
    -----
    Regular bonds are assigned to all pairs of atoms where
    the interatomic distance is less than or equal to 1.3 times the
    sum of their respective covalent radii.

    References
    ----------
    Bakken and Helgaker, JCP Vol. 117, Num. 20 22 Nov. 2002
    http://folk.uio.no/helgaker/reprints/2002/JCP117b_GeoOpt.pdf
    """
    from scipy.spatial.distance import squareform, pdist

    xyz = conf.xyz[0, :, :]
    n_atoms = xyz.shape[0]

    elements = np.zeros(n_atoms, dtype='S1')
    atom_names = [a.name for a in conf.top.atoms()]
    for i in xrange(n_atoms):
        # name of the element that is atom[i]
        # take the first character of the AtomNames string,
        # after stripping off any digits

        elements[i] = atom_names[i].strip('123456789 ')[0]
        if not elements[i] in COVALENT_RADII.keys():
            raise ValueError("I don't know about this AtomName: {}".format(
                atom_names[i]))

    distance_mtx = squareform(pdist(xyz))
    connectivity = []

    for i in xrange(n_atoms):
        for j in xrange(i + 1, n_atoms):
            # Regular bonds are assigned to all pairs of atoms where
            # the interatomic distance is less than or equal to 1.3 times the
            # sum of their respective covalent radii.
            d = distance_mtx[i, j]
            if d < 1.3 * (COVALENT_RADII[elements[i]] + COVALENT_RADII[elements[j]]):
                connectivity.append((i, j))

    return np.array(connectivity)
Exemple #10
0
def hub_scores(msm, waypoints=None):
    """
    Calculate the hub score for one or more waypoints

    The "hub score" is a measure of how well traveled a certain state or
    set of states is in a network. Specifically, it is the fraction of
    times that a walker visits a state en route from some state A to another
    state B, averaged over all combinations of A and B.


    Parameters
    ----------
    msm : msmbuilder.MarkovStateModel
        MSM to analyze
    waypoints : array_like, int, optional
        The index of the intermediate state (or more than one).
        If None, then all waypoints will be used

    Returns
    -------
    hub_score : float
        The hub score for the waypoint

    References
    ----------
    .. [1] Dickson & Brooks (2012), J. Chem. Theory Comput., 8, 3044-3052.
    """

    n_states = msm.n_states_
    if isinstance(waypoints, int):
        waypoints = [waypoints]
    elif waypoints is None:
        waypoints = xrange(n_states)
    elif not (isinstance(waypoints, list) or
              isinstance(waypoints, np.ndarray)):
        raise ValueError("waypoints (%s) must be an int, a list, or None" %
                         str(waypoints))

    hub_scores = []
    for waypoint in waypoints:
        other_states = (i for i in xrange(n_states) if i != waypoint)

        # calculate the hub score for this waypoint
        hub_score = 0.0
        for (source, sink) in itertools.permutations(other_states, 2):
            hub_score += fraction_visited(source, sink, waypoint, msm)

        hub_score /= float((n_states - 1) * (n_states - 2))
        hub_scores.append(hub_score)

    return np.array(hub_scores)
Exemple #11
0
def hub_scores(msm, waypoints=None):
    """
    Calculate the hub score for one or more waypoints

    The "hub score" is a measure of how well traveled a certain state or
    set of states is in a network. Specifically, it is the fraction of
    times that a walker visits a state en route from some state A to another
    state B, averaged over all combinations of A and B.


    Parameters
    ----------
    msm : msmbuilder.MarkovStateModel
        MSM to analyze
    waypoints : array_like, int, optional
        The index of the intermediate state (or more than one).
        If None, then all waypoints will be used

    Returns
    -------
    hub_score : float
        The hub score for the waypoint

    References
    ----------
    .. [1] Dickson & Brooks (2012), J. Chem. Theory Comput., 8, 3044-3052.
    """

    n_states = msm.n_states_
    if isinstance(waypoints, int):
        waypoints = [waypoints]
    elif waypoints is None:
        waypoints = xrange(n_states)
    elif not (isinstance(waypoints, list)
              or isinstance(waypoints, np.ndarray)):
        raise ValueError("waypoints (%s) must be an int, a list, or None" %
                         str(waypoints))

    hub_scores = []
    for waypoint in waypoints:
        other_states = (i for i in xrange(n_states) if i != waypoint)

        # calculate the hub score for this waypoint
        hub_score = 0.0
        for (source, sink) in itertools.permutations(other_states, 2):
            hub_score += fraction_visited(source, sink, waypoint, msm)

        hub_score /= float((n_states - 1) * (n_states - 2))
        hub_scores.append(hub_score)

    return np.array(hub_scores)
Exemple #12
0
def assign_in_memory(metric, generators, project, atom_indices_to_load=None):
    """
    Assign every frame to its closest generator

    This code does everything in memory, and does not checkpoint. It also does
    not save any results to disk.

    Parameters
    ----------
    metric : msmbuilder.metrics.AbstractDistanceMetric
        A distance metric used to define "closest"
    project : msmbuilder.Project
        Used to load the trajectories
    generators : msmbuilder.Trajectory
        A trajectory containing the structures of all of the cluster centers
    atom_indices_to_load : {None, list}
        The indices of the atoms to load for each trajectory chunk. Note that
        this method is responsible for loading up atoms from the project, but
        does NOT load up the generators. Those are passed in as a trajectory
        object (above). So if the generators are already subsampled to a restricted
        set of atom indices, but the trajectories on disk are NOT, you'll
        need to pass in a set of indices here to resolve the difference.

    See Also
    --------
    assign_with_checkpoint
    """

    n_trajs, max_traj_length = project.n_trajs, np.max(project.traj_lengths)
    assignments = -1 * np.ones((n_trajs, max_traj_length), dtype='int')
    distances = -1 * np.ones((n_trajs, max_traj_length), dtype='float32')

    pgens = metric.prepare_trajectory(generators)

    for i in xrange(n_trajs):
        traj = project.load_traj(i, atom_indices=atom_indices_to_load)

        if traj['XYZList'].shape[1] != generators['XYZList'].shape[1]:
            raise ValueError(
                'Number of atoms in generators does not match '
                'traj we\'re trying to assign! Maybe check atom indices?')

        ptraj = metric.prepare_trajectory(traj)

        for j in xrange(len(traj)):
            d = metric.one_to_all(ptraj, pgens, j)
            assignments[i, j] = np.argmin(d)
            distances[i, j] = d[assignments[i, j]]

    return assignments, distances
Exemple #13
0
def assign_in_memory(metric, generators, project, atom_indices_to_load=None):
    """
    Assign every frame to its closest generator

    This code does everything in memory, and does not checkpoint. It also does
    not save any results to disk.

    Parameters
    ----------
    metric : msmbuilder.metrics.AbstractDistanceMetric
        A distance metric used to define "closest"
    project : msmbuilder.Project
        Used to load the trajectories
    generators : msmbuilder.Trajectory
        A trajectory containing the structures of all of the cluster centers
    atom_indices_to_load : {None, list}
        The indices of the atoms to load for each trajectory chunk. Note that
        this method is responsible for loading up atoms from the project, but
        does NOT load up the generators. Those are passed in as a trajectory
        object (above). So if the generators are already subsampled to a restricted
        set of atom indices, but the trajectories on disk are NOT, you'll
        need to pass in a set of indices here to resolve the difference.

    See Also
    --------
    assign_with_checkpoint
    """

    n_trajs, max_traj_length = project.n_trajs, np.max(project.traj_lengths)
    assignments = -1 * np.ones((n_trajs, max_traj_length), dtype='int')
    distances = -1 * np.ones((n_trajs, max_traj_length), dtype='float32')

    pgens = metric.prepare_trajectory(generators)

    for i in xrange(n_trajs):
        traj = project.load_traj(i, atom_indices=atom_indices_to_load)

        if traj['XYZList'].shape[1] != generators['XYZList'].shape[1]:
            raise ValueError('Number of atoms in generators does not match '
                             'traj we\'re trying to assign! Maybe check atom indices?')

        ptraj = metric.prepare_trajectory(traj)

        for j in xrange(len(traj)):
            d = metric.one_to_all(ptraj, pgens, j)
            assignments[i, j] = np.argmin(d)
            distances[i, j] = d[assignments[i, j]]

    return assignments, distances
Exemple #14
0
def test_paths():

    net_flux = np.array([[0.0, 0.5, 0.5, 0.0, 0.0, 0.0],
                         [0.0, 0.0, 0.0, 0.3, 0.0, 0.2],
                         [0.0, 0.0, 0.0, 0.0, 0.5, 0.0],
                         [0.0, 0.0, 0.0, 0.0, 0.0, 0.3],
                         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])

    sources = np.array([0])
    sinks = np.array([4, 5])

    ref_paths = [[0, 2, 4],
                 [0, 1, 3,  5],
                 [0, 1, 5]]

    ref_fluxes = np.array([0.5, 0.3, 0.2])
    
    res_bottle = tpt.paths(sources, sinks, net_flux, remove_path='bottleneck') 
    res_subtract = tpt.paths(sources, sinks, net_flux, remove_path='subtract')

    for paths, fluxes in [res_bottle, res_subtract]:
        npt.assert_array_almost_equal(fluxes, ref_fluxes)
        assert len(paths) == len(ref_paths)

        for i in xrange(len(paths)):
            npt.assert_array_equal(paths[i], ref_paths[i])
Exemple #15
0
    def read(self, n_frames=None, stride=None, atom_indices=None):
        """Read data from a lammpstrj file.

        Parameters
        ----------
        n_frames : int, None
            The number of frames you would like to read from the file.
            If None, all of the remaining frames will be loaded.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates
            from the file.

        Returns
        -------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3), dtype=np.float32
        cell_lengths : np.ndarray, None
            The lengths (a,b,c) of the unit cell for each frame, or None if
            the information is not present in the file.
        cell_angles : np.ndarray, None
            The angles (\alpha, \beta, \gamma) defining the unit cell for
            each frame, or None if  the information is not present in the file.
        """
        if not self._mode == 'r':
            raise ValueError('read() is only available when file is opened '
                             'in mode="r"')

        if n_frames is None:
            frame_counter = itertools.count()
        else:
            frame_counter = xrange(n_frames)

        if stride is None:
            stride = 1

        all_coords, all_lengths, all_angles = [], [], []
        for _ in frame_counter:
            try:
                frame_coords, frame_lengths, frame_angles = self._read()
                if atom_indices is not None:
                    frame_coords = frame_coords[atom_indices, :]
            except _EOF:
                break

            all_coords.append(frame_coords)
            all_lengths.append(frame_lengths)
            all_angles.append(frame_angles)

            for j in range(stride - 1):
                # throw away these frames
                try:
                    self._read()
                except _EOF:
                    break

        all_coords = np.array(all_coords)
        all_lengths = np.array(all_lengths, dtype=np.float32)
        all_angles = np.array(all_angles, dtype=np.float32)
        return all_coords, all_lengths, all_angles
Exemple #16
0
    def _read(self):
        """Read a single frame. """

        first = self._fh.readline()  # Number of atoms.
        if first == '':
            raise _EOF()
        else:
            self._n_atoms = int(first)
        self._fh.readline()  # Comment line.
        self._line_counter += 2

        xyz = np.empty(shape=(self._n_atoms, 3))
        types = np.empty(shape=self._n_atoms, dtype=str)

        for i in xrange(self._n_atoms):
            line = self._fh.readline()
            if line == '':
                raise _EOF()
            split_line = line.split()
            try:
                types[i] = split_line[0]
                xyz[i] = [float(x) for x in split_line[1:4]]
            except Exception:
                raise IOError('xyz parse error on line {0:d} of "{1:s}". '
                              'This file does not appear to be a valid '
                              'xyz file.'.format(
                        self._line_counter,  self._filename))
            self._line_counter += 1
        # --- end body ---

        self._frame_index += 1
        return xyz
Exemple #17
0
def test_paths():

    net_flux = np.array([[0.0, 0.5, 0.5, 0.0, 0.0, 0.0],
                         [0.0, 0.0, 0.0, 0.3, 0.0, 0.2],
                         [0.0, 0.0, 0.0, 0.0, 0.5, 0.0],
                         [0.0, 0.0, 0.0, 0.0, 0.0, 0.3],
                         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                         [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])

    sources = np.array([0])
    sinks = np.array([4, 5])

    ref_paths = [[0, 2, 4], [0, 1, 3, 5], [0, 1, 5]]

    ref_fluxes = np.array([0.5, 0.3, 0.2])

    res_bottle = tpt.paths(sources, sinks, net_flux, remove_path='bottleneck')
    res_subtract = tpt.paths(sources, sinks, net_flux, remove_path='subtract')

    for paths, fluxes in [res_bottle, res_subtract]:
        npt.assert_array_almost_equal(fluxes, ref_fluxes)
        assert len(paths) == len(ref_paths)

        for i in xrange(len(paths)):
            npt.assert_array_equal(paths[i], ref_paths[i])
Exemple #18
0
def uneven_zip(*args):
    '''Zip the arguments together like the builtin function, except that
    when one argument runs out (because its shorter), you keep filling it in
    with its last value

    i.e.

    uneven_zip([1,2,3], 'a', [10,11]) = [[1, 'a', 10], [2, 'a', 11], [3, 'a', 11]]
    '''
    num_args = len(args)
    args = list(args)
    for i in xrange(num_args):
        if not hasattr(args[i], '__len__'):
            args[i] = (args[i],)
    lengths = [len(arg) for arg in args]

    def get(i):
        result = [None] * num_args
        for j in range(num_args):
            try:
                result[j] = args[j][i]
            except:
                result[j] = args[j][lengths[j] - 1]
        return result
    zipped = [get(i) for i in range(max(lengths))]
    return zipped
def save(confs_by_state, states, style, format, outdir):
    "Save the results to disk"
    if style == 'sep':
        for i, trj in enumerate(confs_by_state):
            for j in xrange(len(trj)):

                fn = os.path.join(outdir, 'State%d-%d.%s' % (states[i], j,
                                                             format))
                arglib.die_if_path_exists(fn)

                logger.info("Saving file: %s" % fn)
                trj[j].save(fn)

    elif style == 'tps':
        #print (confs_by_state)
        for i, trj in enumerate(confs_by_state):
            #print (trj)
            fn = os.path.join(outdir, 'State%d.%s' % (states[i], format))
            arglib.die_if_path_exists(fn)

            logger.info("Saving file: %s" % fn)
            concatenate_trajectories(trj).save(fn)
            #trj.save(fn)

    elif style == 'one':
        fn = os.path.join(outdir, 'Confs.%s' % format)
        arglib.die_if_path_exists(fn)

        logger.info("Saving file: %s" % fn)
        concatenate_trajectories(confs_by_state).save(fn)

    else:
        raise ValueError('Invalid style: %s' % style)
def save(confs_by_state, states, style, format, outdir):
    "Save the results to disk"

    if style == "sep":
        for i, trj in enumerate(confs_by_state):
            for j in xrange(len(trj)):

                fn = os.path.join(outdir, "State%d-%d.%s" % (states[i], j, format))
                arglib.die_if_path_exists(fn)

                logger.info("Saving file: %s" % fn)
                trj[j].save(fn)

    elif style == "tps":
        for i, trj in enumerate(confs_by_state):
            fn = os.path.join(outdir, "State%d.%s" % (states[i], format))
            arglib.die_if_path_exists(fn)

            logger.info("Saving file: %s" % fn)
            trj.save(fn)

    elif style == "one":
        fn = os.path.join(outdir, "Confs.%s" % format)
        arglib.die_if_path_exists(fn)

        logger.info("Saving file: %s" % fn)
        concatenate_trajectories(confs_by_state).save(fn)

    else:
        raise ValueError("Invalid style: %s" % style)
Exemple #21
0
    def write(self, xyz, types=None):
        """Write one or more frames of data to a xyz file.

        Parameters
        ----------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms to write.
        types : np.ndarray, shape(3, )
            The type of each particle.
        """

        if not self._mode == 'w':
            raise ValueError('write() is only available when file is opened '
                             'in mode="w"')

        if not types:
            # Make all particles the same type.
            types = ['X' for _ in xrange(xyz.shape[1])]
        xyz = ensure_type(xyz, np.float32, 3, 'xyz', can_be_none=False,
                        shape=(None, None, 3), warn_on_cast=False,
                        add_newaxis_on_deficient_ndim=True)
        in_units_of(xyz, 'nanometers', self.distance_unit, inplace=True)

        for i in range(xyz.shape[0]):
            self._fh.write('{0}\n'.format(xyz.shape[1]))
            self._fh.write("Created with MDTraj {0}, {1}\n".format(version, str(date.today())))

            for j, coord in enumerate(xyz[i]):
                self._fh.write('{0} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format(
                    types[j], coord[0], coord[1], coord[2]))
Exemple #22
0
    def write(self, xyz, types=None):
        """Write one or more frames of data to a xyz file.

        Parameters
        ----------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms to write. By convention for
            this trajectory format, the lengths should be in units of angstroms.
        types : np.ndarray, shape(3, )
            The type of each particle.
        """

        if not self._mode == 'w':
            raise ValueError('write() is only available when file is opened '
                             'in mode="w"')

        if not types:
            # Make all particles the same type.
            types = ['X' for _ in xrange(xyz.shape[1])]
        xyz = ensure_type(xyz, np.float32, 3, 'xyz', can_be_none=False,
                        shape=(None, None, 3), warn_on_cast=False,
                        add_newaxis_on_deficient_ndim=True)

        for i in range(xyz.shape[0]):
            self._fh.write('{0}\n'.format(xyz.shape[1]))
            self._fh.write("Created with MDTraj {0}, {1}\n".format(version, str(date.today())))

            for j, coord in enumerate(xyz[i]):
                self._fh.write('{0} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format(
                    types[j], coord[0], coord[1], coord[2]))
Exemple #23
0
def get_angle_connectivity(ibonds):
    """Given the bonds, get the indices of the atoms defining all the bond
    angles

    Parameters
    ----------
    ibonds : np.ndarray, shape=[n_bonds, 2], dtype=int
        n_bonds x 2 array of indices, where each row is the index of two
        atom who participate in a bond.

    Returns
    -------
    iangles : np.ndarray, shape[n_angles, 3], dtype=int
        n_angles x 3 array of indices, where each row is the index of three
        atoms m,n,o such that n is bonded to both m and o.
    """
    nx = import_('networkx')
    graph = nx.from_edgelist(ibonds)
    n_atoms = graph.number_of_nodes()
    iangles = []

    for i in xrange(n_atoms):
        for (m, n) in combinations(graph.neighbors(i), 2):
            # so now the there is a bond angle m-i-n
            iangles.append((m, i, n))

    return np.array(iangles)
Exemple #24
0
    def _read(self):
        """Read a single frame. """

        first = self._fh.readline()  # Number of atoms.
        if first == '':
            raise _EOF()
        else:
            self._n_atoms = int(first)
        self._fh.readline()  # Comment line.
        self._line_counter += 2

        xyz = np.empty(shape=(self._n_atoms, 3))
        types = np.empty(shape=self._n_atoms, dtype=str)

        for i in xrange(self._n_atoms):
            line = self._fh.readline()
            if line == '':
                raise _EOF()
            split_line = line.split()
            try:
                types[i] = split_line[0]
                xyz[i] = [float(x) for x in split_line[1:4]]
            except Exception:
                raise IOError('xyz parse error on line {0:d} of "{1:s}". '
                              'This file does not appear to be a valid '
                              'xyz file.'.format(
                        self._line_counter,  self._filename))
            self._line_counter += 1
        # --- end body ---

        self._frame_index += 1
        return xyz
Exemple #25
0
    def read(self, n_frames=None, stride=None, atom_indices=None):
        """Read data from a lammpstrj file.

        Parameters
        ----------
        n_frames : int, None
            The number of frames you would like to read from the file.
            If None, all of the remaining frames will be loaded.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates
            from the file.

        Returns
        -------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3), dtype=np.float32
        cell_lengths : np.ndarray, None
            The lengths (a,b,c) of the unit cell for each frame, or None if
            the information is not present in the file.
        cell_angles : np.ndarray, None
            The angles (\alpha, \beta, \gamma) defining the unit cell for
            each frame, or None if  the information is not present in the file.
        """
        if not self._mode == 'r':
            raise ValueError('read() is only available when file is opened '
                             'in mode="r"')

        if n_frames is None:
            frame_counter = itertools.count()
        else:
            frame_counter = xrange(n_frames)

        if stride is None:
            stride = 1

        all_coords, all_lengths, all_angles = [], [], []
        for _ in frame_counter:
            try:
                frame_coords, frame_lengths, frame_angles = self._read()
                if atom_indices is not None:
                    frame_coords = frame_coords[atom_indices, :]
            except _EOF:
                break

            all_coords.append(frame_coords)
            all_lengths.append(frame_lengths)
            all_angles.append(frame_angles)

            for j in range(stride - 1):
                # throw away these frames
                try:
                    self._read()
                except _EOF:
                    break

        all_coords = np.array(all_coords)
        all_lengths = np.array(all_lengths, dtype=np.float32)
        all_angles = np.array(all_angles, dtype=np.float32)
        return all_coords, all_lengths, all_angles
Exemple #26
0
def get_dihedral_connectivity(ibonds):
    """Given the bonds, get the indices of the atoms defining all the dihedral
    angles

    Parameters
    ----------
    ibonds : np.ndarray, shape=[n_bonds, 2], dtype=int
        n_bonds x 2 array of indices, where each row is the index of two
        atom who participate in a bond.

    Returns
    -------
    idihedrals : np.ndarray, shape[n_dihedrals, 4], dtype=int
        All sets of 4 atoms A,B,C,D such that A is bonded to B, B is bonded
        to C, and C is bonded to D
    """
    nx = import_('networkx')
    graph = nx.from_edgelist(ibonds)
    n_atoms = graph.number_of_nodes()
    idihedrals = []

    # TODO: CHECK FOR DIHEDRAL ANGLES THAT ARE 180 and recover
    # conf : msmbuilder.Trajectory
    #    An msmbuilder trajectory, only the first frame will be used. This
    #    is used purely to make the check for angle(ABC) != 180.

    for a in xrange(n_atoms):
        for b in graph.neighbors(a):
            for c in filter(lambda c: c not in [a, b], graph.neighbors(b)):
                for d in filter(lambda d: d not in [a, b, c],
                                graph.neighbors(c)):
                    idihedrals.append((a, b, c, d))

    return np.array(idihedrals)
Exemple #27
0
def get_dihedral_connectivity(ibonds):
    """Given the bonds, get the indices of the atoms defining all the dihedral
    angles

    Parameters
    ----------
    ibonds : np.ndarray, shape=[n_bonds, 2], dtype=int
        n_bonds x 2 array of indices, where each row is the index of two
        atom who participate in a bond.

    Returns
    -------
    idihedrals : np.ndarray, shape[n_dihedrals, 4], dtype=int
        All sets of 4 atoms A,B,C,D such that A is bonded to B, B is bonded
        to C, and C is bonded to D
    """
    nx = import_('networkx')
    graph = nx.from_edgelist(ibonds)
    n_atoms = graph.number_of_nodes()
    idihedrals = []

    # TODO: CHECK FOR DIHEDRAL ANGLES THAT ARE 180 and recover
    # conf : msmbuilder.Trajectory
    #    An msmbuilder trajectory, only the first frame will be used. This
    #    is used purely to make the check for angle(ABC) != 180.

    for a in xrange(n_atoms):
        for b in graph.neighbors(a):
            for c in filter(lambda c: c not in [a, b], graph.neighbors(b)):
                for d in filter(lambda d: d not in [a, b, c], graph.neighbors(c)):
                    idihedrals.append((a, b, c, d))

    return np.array(idihedrals)
Exemple #28
0
def get_angle_connectivity(ibonds):
    """Given the bonds, get the indices of the atoms defining all the bond
    angles

    Parameters
    ----------
    ibonds : np.ndarray, shape=[n_bonds, 2], dtype=int
        n_bonds x 2 array of indices, where each row is the index of two
        atom who participate in a bond.

    Returns
    -------
    iangles : np.ndarray, shape[n_angles, 3], dtype=int
        n_angles x 3 array of indices, where each row is the index of three
        atoms m,n,o such that n is bonded to both m and o.
    """
    nx = import_('networkx')
    graph = nx.from_edgelist(ibonds)
    n_atoms = graph.number_of_nodes()
    iangles = []

    for i in xrange(n_atoms):
        for (m, n) in combinations(graph.neighbors(i), 2):
            # so now the there is a bond angle m-i-n
            iangles.append((m, i, n))

    return np.array(iangles)
def save(confs_by_state, states, style, format, outdir):
    "Save the results to disk"

    if style == 'sep':
        for i, trj in enumerate(confs_by_state):
            for j in xrange(len(trj)):

                fn = os.path.join(outdir,
                                  'State%d-%d.%s' % (states[i], j, format))
                arglib.die_if_path_exists(fn)

                logger.info("Saving file: %s" % fn)
                trj[j].save(fn)

    elif style == 'tps':
        for i, trj in enumerate(confs_by_state):
            fn = os.path.join(outdir, 'State%d.%s' % (states[i], format))
            arglib.die_if_path_exists(fn)

            logger.info("Saving file: %s" % fn)
            trj.save(fn)

    elif style == 'one':
        fn = os.path.join(outdir, 'Confs.%s' % format)
        arglib.die_if_path_exists(fn)

        logger.info("Saving file: %s" % fn)
        concatenate_trajectories(confs_by_state).save(fn)

    else:
        raise ValueError('Invalid style: %s' % style)
def uneven_zip(*args):
    '''Zip the arguments together like the builtin function, except that
    when one argument runs out (because its shorter), you keep filling it in
    with its last value

    i.e.

    uneven_zip([1,2,3], 'a', [10,11]) = [[1, 'a', 10], [2, 'a', 11], [3, 'a', 11]]
    '''
    num_args = len(args)
    args = list(args)
    for i in xrange(num_args):
        if not hasattr(args[i], '__len__'):
            args[i] = (args[i], )
    lengths = [len(arg) for arg in args]

    def get(i):
        result = [None] * num_args
        for j in range(num_args):
            try:
                result[j] = args[j][i]
            except:
                result[j] = args[j][lengths[j] - 1]
        return result

    zipped = [get(i) for i in range(max(lengths))]
    return zipped
Exemple #31
0
    def save_pdb(self, filename, force_overwrite=True):
        """Save trajectory to RCSB PDB format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filename, if its already there
        """
        self._check_valid_unitcell()

        with PDBTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f:
            for i in xrange(self.n_frames):

                if self._have_unitcell:
                    f.write(convert(self._xyz[i], Trajectory._distance_unit, f.distance_unit),
                            self.topology,
                            modelIndex=i,
                            unitcell_lengths=convert(self.unitcell_lengths[i], Trajectory._distance_unit, f.distance_unit),
                            unitcell_angles=self.unitcell_angles[i])
                else:
                    f.write(convert(self._xyz[i], Trajectory._distance_unit, f.distance_unit),
                            self.topology,
                            modelIndex=i)
Exemple #32
0
    def load_from(cls, filename):
        """
        Load project from disk

        Parameters
        ----------
        filename : string
            filename_or_file can be a path to a legacy .h5 or current
            .yaml file.

        Returns
        -------
        project : the loaded project object

        """

        rootdir = os.path.abspath(os.path.dirname(filename))

        if filename.endswith('.yaml'):
            with open(filename) as f:
                ondisk = yaml.load(f, Loader=Loader)
                records = {
                    'conf_filename': ondisk['conf_filename'],
                    'traj_lengths': [],
                    'traj_paths': [],
                    'traj_converted_from': [],
                    'traj_errors': []
                }

                for trj in ondisk['trajs']:
                    records['traj_lengths'].append(trj['length'])
                    records['traj_paths'].append(trj['path'])
                    records['traj_errors'].append(trj['errors'])
                    records['traj_converted_from'].append(
                        trj['converted_from'])

        elif filename.endswith('.h5'):
            ondisk = io.loadh(filename, deferred=False)
            n_trajs = len(ondisk['TrajLengths'])
            records = {
                'conf_filename': str(ondisk['ConfFilename'][0]),
                'traj_lengths': ondisk['TrajLengths'],
                'traj_paths': [],
                'traj_converted_from': [[None]] * n_trajs,
                'traj_errors': [None] * n_trajs
            }

            for i in xrange(n_trajs):
                # this is the convention used in the hdf project format to get the traj paths
                path = os.path.join(
                    ondisk['TrajFilePath'][0], ondisk['TrajFileBaseName'][0] +
                    str(i) + ondisk['TrajFileType'][0])
                records['traj_paths'].append(path)

        else:
            raise ValueError('Sorry, I can only open files in .yaml'
                             ' or .h5 format: %s' % filename)

        return cls(records, validate=False, project_dir=rootdir)
 def test_prepare(self):
     rmsds = [metrics.RMSD(), # all atom indices
             metrics.RMSD(range(self.n_atoms)),
             metrics.RMSD(xrange(self.n_atoms)),
             metrics.RMSD(np.arange(self.n_atoms))]
    
     for metric in rmsds:
         ptraj = metric.prepare_trajectory(self.traj)
Exemple #34
0
 def _square_all_pairwise(self, prepared_traj):
     """Reference implementation of all_pairwise"""
     warnings.warn(
         'This is HORRIBLY inefficient. This operation really needs to be done directly in C')
     output = np.empty((prepared_traj.n_frames, prepared_traj.n_frames))
     for i in xrange(prepared_traj.n_frames):
         output[i] = self.one_to_all(prepared_traj, prepared_traj, i)
     return output
Exemple #35
0
 def _eval_traj_shapes(self):
     lengths = np.zeros(self.n_trajs)
     n_atoms = np.zeros(self.n_trajs)
     for i in xrange(self.n_trajs):
         filename = self.traj_filename(i)
         with md.open(filename) as f:
             lengths[i] = len(f)
         n_atoms[i] = md.load_frame(filename, 0).n_atoms
     return lengths, n_atoms
def propagate_model(transition_matrix,
                    n_steps,
                    initial_populations,
                    observable_vector=None):
    """Propogate the time evolution of a population vector.

    Parameters
    ----------
    T : ndarray or sparse matrix
        A transition matrix
    NumSteps : int
        How many timesteps to iterate
    initial_populations : ndarray
        The initial population vector
    observable_vector : ndarray
        Vector containing the state-wise averaged property of some observable.
        Can be used to propagate properties such as fraction folded, ensemble
        average RMSD, etc.  Default: None

    Returns
    -------
    X : ndarray
        Final population vector, after propagation
    obslist : list
        list of floats of length equal to the number of steps, giving the mean value
        of the observable (dot product of `ObservableVector` and populations) at
        each timestep

    See Also
    --------
    sample
    scipy.sparse.linalg.aslinearoperator

    """
    check_transition(transition_matrix)

    if observable_vector == None:
        check_dimensions(transition_matrix, initial_populations)
    else:
        check_dimensions(transition_matrix, initial_populations,
                         observable_vector)

    X = initial_populations.copy()
    obslist = []
    if scipy.sparse.issparse(transition_matrix):
        TC = transition_matrix.tocsr()
    else:
        TC = transition_matrix

    Tl = scipy.sparse.linalg.aslinearoperator(TC)

    for i in xrange(n_steps):
        X = Tl.rmatvec(X)
        if observable_vector is not None:
            obslist.append(sum(observable_vector * X))

    return X, obslist
Exemple #37
0
 def _eval_traj_shapes(self):
     lengths = np.zeros(self.n_trajs)
     n_atoms = np.zeros(self.n_trajs)
     for i in xrange(self.n_trajs):
         filename = self.traj_filename(i)
         with md.open(filename) as f:
             lengths[i] = len(f)
         n_atoms[i] = md.load_frame(filename, 0).n_atoms
     return lengths, n_atoms
 def _square_all_pairwise(self, prepared_traj):
     """Reference implementation of all_pairwise"""
     warnings.warn(
         'This is HORRIBLY inefficient. This operation really needs to be done directly in C'
     )
     output = np.empty((prepared_traj.n_frames, prepared_traj.n_frames))
     for i in xrange(prepared_traj.n_frames):
         output[i] = self.one_to_all(prepared_traj, prepared_traj, i)
     return output
Exemple #39
0
    def save(self, filename_or_file):
        if isinstance(filename_or_file, string_types):
            if not filename_or_file.endswith('.yaml'):
                filename_or_file += '.yaml'
            dirname = os.path.abspath(os.path.dirname(filename_or_file))
            if not os.path.exists(dirname):
                logger.info("Creating directory: %s" % dirname)
                os.makedirs(dirname)
            handle = open(filename_or_file, 'w')
            own_fid = True
        elif isinstance(filename_or_file, file):
            dirname = os.path.abspath(os.path.dirname(filename_or_file.name))
            handle = filename_or_file
            own_fid = False

        # somewhat complicated logic if the directory you're
        # saving in is different than the directory this
        # project references its paths from

        # the point is that the when the file lists paths, those
        # paths are going to be interpreted as being with respect to
        # the directory that the file is in. So when the Project file
        # is being resaved (but the Trajectorys are not being moved)
        # then the paths need to change to compensate

        relative = os.path.relpath(self._project_dir,
                                   os.path.dirname(filename_or_file))

        records = {'trajs': []}
        records['conf_filename'] = os.path.join(relative, self._conf_filename)
        traj_paths = [
            os.path.join(relative, path) for path in self._traj_paths
        ]
        for i in xrange(len(traj_paths)):
            # yaml doesn't like numpy types, so we have to sanitize them
            records['trajs'].append({
                'id':
                i,
                'path':
                str(traj_paths[i]),
                'converted_from':
                list(self._traj_converted_from[i]),
                'length':
                int(self._traj_lengths[i]),
                'errors':
                self._traj_errors[i]
            })

        yaml.dump(records, handle, Dumper=Dumper)

        if own_fid:
            handle.close()

        return filename_or_file
def propagate_model(transition_matrix, n_steps, initial_populations, observable_vector=None):
    """Propogate the time evolution of a population vector.

    Parameters
    ----------
    T : ndarray or sparse matrix
        A transition matrix
    NumSteps : int
        How many timesteps to iterate
    initial_populations : ndarray
        The initial population vector
    observable_vector : ndarray
        Vector containing the state-wise averaged property of some observable.
        Can be used to propagate properties such as fraction folded, ensemble
        average RMSD, etc.  Default: None

    Returns
    -------
    X : ndarray
        Final population vector, after propagation
    obslist : list
        list of floats of length equal to the number of steps, giving the mean value
        of the observable (dot product of `ObservableVector` and populations) at
        each timestep

    See Also
    --------
    sample
    scipy.sparse.linalg.aslinearoperator

    """
    check_transition(transition_matrix)

    if observable_vector == None:
        check_dimensions(transition_matrix, initial_populations)
    else:
        check_dimensions(transition_matrix, initial_populations, observable_vector)

    X = initial_populations.copy()
    obslist = []
    if scipy.sparse.issparse(transition_matrix):
        TC = transition_matrix.tocsr()
    else:
        TC = transition_matrix

    Tl = scipy.sparse.linalg.aslinearoperator(TC)

    for i in xrange(n_steps):
        X = Tl.rmatvec(X)
        if observable_vector is not None:
            obslist.append(sum(observable_vector * X))

    return X, obslist
Exemple #41
0
    def read(self, n_frames=None, stride=None, atom_indices=None):
        """Read data from a TINKER .arc file.  Note that only the
        Cartesian coordinates are read in.  The .arc file also
        contains TINKER-specific numeric atom types and some bonding
        information, which we do not read in.

        Parameters
        ----------
        n_frames : int, None
            The number of frames you would like to read from the file.
            If None, all of the remaining frames will be loaded.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates
            from the file.

        Returns
        -------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3), dtype=np.float32
            The cartesian coordinates, in angstroms
        """
        if not self._mode == 'r':
            raise ValueError('read() is only available when file is opened '
                             'in mode="r"')

        if n_frames is None:
            frame_counter = itertools.count()
        else:
            frame_counter = xrange(n_frames)

        if stride is None:
            stride = 1

        coords = []
        for i in frame_counter:
            try:
                coord = self._read()
                if atom_indices is not None:
                    coord = coord[atom_indices, :]
            except _EOF:
                break

            coords.append(coord)

            for j in range(stride - 1):
                # throw away these frames
                self._read()

        coords = np.array(coords)

        return coords
Exemple #42
0
    def load_from(cls, filename):
        """
        Load project from disk

        Parameters
        ----------
        filename : string
            filename_or_file can be a path to a legacy .h5 or current
            .yaml file.

        Returns
        -------
        project : the loaded project object

        """

        rootdir = os.path.abspath(os.path.dirname(filename))

        if filename.endswith('.yaml'):
            with open(filename) as f:
                ondisk = yaml.load(f, Loader=Loader)
                records = {'conf_filename': ondisk['conf_filename'],
                           'traj_lengths': [],
                           'traj_paths': [],
                           'traj_converted_from': [],
                           'traj_errors': []}

                for trj in ondisk['trajs']:
                    records['traj_lengths'].append(trj['length'])
                    records['traj_paths'].append(trj['path'])
                    records['traj_errors'].append(trj['errors'])
                    records['traj_converted_from'].append(trj['converted_from'])

        elif filename.endswith('.h5'):
            ondisk = io.loadh(filename, deferred=False)
            n_trajs = len(ondisk['TrajLengths'])
            records = {'conf_filename': str(ondisk['ConfFilename'][0]),
                       'traj_lengths': ondisk['TrajLengths'],
                       'traj_paths': [],
                       'traj_converted_from': [[None]] * n_trajs,
                       'traj_errors': [None] * n_trajs}

            for i in xrange(n_trajs):
                # this is the convention used in the hdf project format to get the traj paths
                path = os.path.join(ondisk['TrajFilePath'][0], ondisk['TrajFileBaseName'][0] + str(i) + ondisk['TrajFileType'][0])
                records['traj_paths'].append(path)

        else:
            raise ValueError('Sorry, I can only open files in .yaml'
                             ' or .h5 format: %s' % filename)

        return cls(records, validate=False, project_dir=rootdir)
Exemple #43
0
    def read(self, n_frames=None, stride=None, atom_indices=None):
        """Read data from a TINKER .arc file.  Note that only the
        Cartesian coordinates are read in.  The .arc file also
        contains TINKER-specific numeric atom types and some bonding
        information, which we do not read in.

        Parameters
        ----------
        n_frames : int, None
            The number of frames you would like to read from the file.
            If None, all of the remaining frames will be loaded.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates
            from the file.

        Returns
        -------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3), dtype=np.float32
            The cartesian coordinates, in angstroms
        """
        if not self._mode == 'r':
            raise ValueError('read() is only available when file is opened '
                             'in mode="r"')

        if n_frames is None:
            frame_counter = itertools.count()
        else:
            frame_counter = xrange(n_frames)

        if stride is None:
            stride = 1

        coords = []
        for i in frame_counter:
            try:
                coord = self._read()
                if atom_indices is not None:
                    coord = coord[atom_indices, :]
            except _EOF:
                break

            coords.append(coord)

            for j in range(stride - 1):
                # throw away these frames
                self._read()

        coords = np.array(coords)

        return coords
Exemple #44
0
def test_mfpt_match():
    assignments = np.random.randint(10, size=(10, 2000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)

    # these two do different things
    mfpts0 = np.vstack([tpt.mfpts(msm, i) for i in xrange(10)]).T
    mfpts1 = tpt.mfpts(msm)

    # print(mfpts0)
    # print(mfpts1)

    npt.assert_array_almost_equal(mfpts0, mfpts1)
Exemple #45
0
def test_mfpt_match():
    assignments = np.random.randint(10, size=(10, 2000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)

    # these two do different things
    mfpts0 = np.vstack([tpt.mfpts(msm, i) for i in xrange(10)]).T
    mfpts1 = tpt.mfpts(msm)

    # print(mfpts0)
    # print(mfpts1)

    npt.assert_array_almost_equal(mfpts0, mfpts1)
Exemple #46
0
    def _read(self):
        """Read a single frame. """

        # --- begin header ---
        first = self._fh.readline()  # ITEM: TIMESTEP
        if first == '':
            raise _EOF()
        self._fh.readline()  # timestep
        self._fh.readline()  # ITEM: NUMBER OF ATOMS
        self._n_atoms = int(self._fh.readline())  # num atoms

        box_header = self._fh.readline().split()  # ITEM: BOX BOUNDS
        self._line_counter += 5
        if len(box_header) == 9:
            lengths, angles = self.parse_box('triclinic')
        elif len(box_header) == 6:
            lengths, angles = self.parse_box('orthogonal')
        else:
            raise IOError('lammpstrj parse error on line {0:d} of "{1:s}". '
                          'This file does not appear to be a valid '
                          'lammpstrj file.'.format(self._line_counter,
                                                   self._filename))

        self._fh.readline()  # ITEM: ATOMS ...
        self._line_counter += 4
        # --- end header ---

        xyz = np.empty(shape=(self._n_atoms, 3))
        types = np.empty(shape=(self._n_atoms), dtype='int')

        # --- begin body ---
        for _ in xrange(self._n_atoms):
            line = self._fh.readline()
            if line == '':
                raise _EOF()
            temp = line.split()
            try:
                atom_index = int(temp[0])
                types[atom_index - 1] = int(temp[1])
                xyz[atom_index - 1] = [float(x) for x in temp[2:5]]
            except Exception:
                raise IOError(
                    'lammpstrj parse error on line {0:d} of "{1:s}". '
                    'This file does not appear to be a valid '
                    'lammpstrj file.'.format(self._line_counter,
                                             self._filename))
            self._line_counter += 1
        # --- end body ---

        self._frame_index += 1
        return xyz, lengths, angles
Exemple #47
0
    def read(self, n_frames=None, stride=None, atom_indices=None):
        """Read data from a xyz file.

        Parameters
        ----------
        n_frames : int, None
            The number of frames you would like to read from the file.
            If None, all of the remaining frames will be loaded.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates
            from the file.

        Returns
        -------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3), dtype=np.float32
        """
        if not self._mode == 'r':
            raise ValueError('read() is only available when file is opened '
                             'in mode="r"')

        if n_frames is None:
            frame_counter = itertools.count()
        else:
            frame_counter = xrange(n_frames)

        if stride is None:
            stride = 1

        all_coords = []
        for i in frame_counter:
            try:
                frame_coords = self._read()
                if atom_indices is not None:
                    frame_coords = frame_coords[atom_indices, :]
            except _EOF:
                break

            all_coords.append(frame_coords)

            for j in range(stride - 1):
                # throw away these frames
                try:
                    self._read()
                except _EOF:
                    break

        all_coords = np.array(all_coords)
        return all_coords
Exemple #48
0
def load_trajectories(project, stride, atom_indices):

    list_of_trajs = []
    for i in xrange(project.n_trajs):
        # note, LoadTraj is only using the fast strided loading for
        # HDF5 formatted trajs
        traj = project.load_traj(i, stride=stride, atom_indices=atom_indices)

        if atom_indices != None:
            assert len(atom_indices) == traj.n_atoms

        list_of_trajs.append(traj)

    return list_of_trajs
Exemple #49
0
def load_trajectories(project, stride, atom_indices):

    list_of_trajs = []
    for i in xrange(project.n_trajs):
        # note, LoadTraj is only using the fast strided loading for
        # HDF5 formatted trajs
        traj = project.load_traj(i, stride=stride, atom_indices=atom_indices)
        
        if atom_indices != None:
            assert len(atom_indices) == traj.n_atoms
        
        list_of_trajs.append(traj)

    return list_of_trajs
Exemple #50
0
    def read(self, n_frames=None, stride=None, atom_indices=None):
        """Read data from a xyz file.

        Parameters
        ----------
        n_frames : int, None
            The number of frames you would like to read from the file.
            If None, all of the remaining frames will be loaded.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates
            from the file.

        Returns
        -------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3), dtype=np.float32
        """
        if not self._mode == 'r':
            raise ValueError('read() is only available when file is opened '
                             'in mode="r"')

        if n_frames is None:
            frame_counter = itertools.count()
        else:
            frame_counter = xrange(n_frames)

        if stride is None:
            stride = 1

        all_coords = []
        for i in frame_counter:
            try:
                frame_coords = self._read()
                if atom_indices is not None:
                    frame_coords = frame_coords[atom_indices, :]
            except _EOF:
                break

            all_coords.append(frame_coords)

            for j in range(stride - 1):
                # throw away these frames
                try:
                    self._read()
                except _EOF:
                    break

        all_coords = np.array(all_coords)
        return all_coords
Exemple #51
0
    def _read(self):
        """Read a single frame. """

        # --- begin header ---
        first = self._fh.readline()  # ITEM: TIMESTEP
        if first == '':
            raise _EOF()
        self._fh.readline()  # timestep
        self._fh.readline()  # ITEM: NUMBER OF ATOMS
        self._n_atoms = int(self._fh.readline())  # num atoms

        box_header = self._fh.readline().split()  # ITEM: BOX BOUNDS
        self._line_counter += 5
        if len(box_header) == 9:
            lengths, angles = self.parse_box('triclinic')
        elif len(box_header) == 6:
            lengths, angles = self.parse_box('orthogonal')
        else:
            raise IOError('lammpstrj parse error on line {0:d} of "{1:s}". '
                          'This file does not appear to be a valid '
                          'lammpstrj file.'.format(
                    self._line_counter,  self._filename))

        self._fh.readline()  # ITEM: ATOMS ...
        self._line_counter += 4
        # --- end header ---

        xyz = np.empty(shape=(self._n_atoms, 3))
        types = np.empty(shape=self._n_atoms, dtype='int')

        # --- begin body ---
        for _ in xrange(self._n_atoms):
            line = self._fh.readline()
            if line == '':
                raise _EOF()
            temp = line.split()
            try:
                atom_index = int(temp[0])
                types[atom_index - 1] = int(temp[1])
                xyz[atom_index - 1] = [float(x) for x in temp[2:5]]
            except Exception:
                raise IOError('lammpstrj parse error on line {0:d} of "{1:s}". '
                              'This file does not appear to be a valid '
                              'lammpstrj file.'.format(
                        self._line_counter,  self._filename))
            self._line_counter += 1
        # --- end body ---

        self._frame_index += 1
        return xyz, lengths, angles
def run(prep_metric,
        project,
        delta_time,
        atom_indices=None,
        output='tICAData.h5',
        min_length=0,
        stride=1):

    # We will load the trajectories at the stride, so we need to find
    # what dt should be once we've strided by some amount
    lag = delta_time / stride

    if (float(delta_time) / stride) != lag:
        raise Exception("Stride must be a divisor of delta_time.")

    if lag > 0:  # Then we're doing tICA
        tica_obj = tICA(lag=lag, calc_cov_mat=True, prep_metric=prep_metric)
    else:  # If lag is zero, this is equivalent to regular PCA
        tica_obj = tICA(lag=lag, calc_cov_mat=False, prep_metric=prep_metric)

    for i in xrange(project.n_trajs):
        logger.info("Working on trajectory %d" % i)

        if project.traj_lengths[i] <= lag:
            logger.info(
                "Trajectory is not long enough for this lag "
                "(%d vs %d)", project.traj_lengths[i], lag)
            continue

        if project.traj_lengths[i] < min_length:
            logger.info(
                "Trajectory is not longer than min_length "
                "(%d vs %d)", project.traj_lengths[i], min_length)
            continue

        # it would be more memory efficient if we trained incrementally
        # at least for long trajectories
        traj_chunk = md.load(project.traj_filename(i),
                             stride=stride,
                             atom_indices=atom_indices)
        tica_obj.train(trajectory=traj_chunk)

    tica_obj.solve()
    tica_obj.save(output)
    logger.info("Saved output to %s", output)

    return tica_obj
Exemple #53
0
    def _validate(self):
        "Run some checks to ensure that this project is consistent"

        if not os.path.exists(self.conf_filename):
            raise ValueError('conf does not exist: %s' % self.conf_filename)
        for i in xrange(self.n_trajs):
            if not os.path.exists(self.traj_filename(i)):
                raise ValueError("%s does not exist" % self.traj_filename(i))
        lengths, atoms = self._eval_traj_shapes()
        if not np.all(self.traj_lengths == lengths):
            raise ValueError('Trajs length don\'t match what\'s on disk')

        # make sure all trajs have the same number of atoms
        # note that it is possible that there are no valid trajectories, so atoms
        # could be empty
        if len(atoms) > 0 and not np.all(atoms == atoms[0]):
            raise ValueError('Not all trajs have the same number of atoms')
Exemple #54
0
    def _validate(self):
        "Run some checks to ensure that this project is consistent"

        if not os.path.exists(self.conf_filename):
            raise ValueError('conf does not exist: %s' % self.conf_filename)
        for i in xrange(self.n_trajs):
            if not os.path.exists(self.traj_filename(i)):
                raise ValueError("%s does not exist" % self.traj_filename(i))
        lengths, atoms = self._eval_traj_shapes()
        if not np.all(self.traj_lengths == lengths):
            raise ValueError('Trajs length don\'t match what\'s on disk')

        # make sure all trajs have the same number of atoms
        # note that it is possible that there are no valid trajectories, so atoms
        # could be empty
        if len(atoms) > 0 and not np.all(atoms == atoms[0]):
            raise ValueError('Not all trajs have the same number of atoms')
Exemple #55
0
    def save(self, filename_or_file):
        if isinstance(filename_or_file, string_types):
            if not filename_or_file.endswith('.yaml'):
                filename_or_file += '.yaml'
            dirname = os.path.abspath(os.path.dirname(filename_or_file))
            if not os.path.exists(dirname):
                logger.info("Creating directory: %s" % dirname)
                os.makedirs(dirname)
            handle = open(filename_or_file, 'w')
            own_fid = True
        elif isinstance(filename_or_file, file):
            dirname = os.path.abspath(os.path.dirname(filename_or_file.name))
            handle = filename_or_file
            own_fid = False

        # somewhat complicated logic if the directory you're
        # saving in is different than the directory this
        # project references its paths from

        # the point is that the when the file lists paths, those
        # paths are going to be interpreted as being with respect to
        # the directory that the file is in. So when the Project file
        # is being resaved (but the Trajectorys are not being moved)
        # then the paths need to change to compensate

        relative = os.path.relpath(self._project_dir, os.path.dirname(filename_or_file))

        records = {'trajs': []}
        records['conf_filename'] = os.path.join(relative, self._conf_filename)
        traj_paths = [os.path.join(relative, path) for path in self._traj_paths]
        for i in xrange(len(traj_paths)):
            # yaml doesn't like numpy types, so we have to sanitize them
            records['trajs'].append({'id': i,
                                    'path': str(traj_paths[i]),
                                    'converted_from': list(self._traj_converted_from[i]),
                                    'length': int(self._traj_lengths[i]),
                                    'errors': self._traj_errors[i]})

        yaml.dump(records, handle, Dumper=Dumper)

        if own_fid:
            handle.close()

        return filename_or_file
Exemple #56
0
def normalize_left_eigenvectors(left_eigenvectors):
    """Normalize the left eigenvectors

    Normalization condition is <left_eigenvectors[:,i] / populations, left_eigenvectors[:,i]> = 1

    Parameters
    ----------
    left_eigenvectors : ndarray
        The left eigenvectors, as a two-dimensional array where the kth
        eigenvectors is left_eigenvectors[:,k]

    Notes
    -----
    Acts inplace. Assumes that left_eigenvectors[:,0] is the equilibrium vector and that detailed balance holds.
    """
    populations = left_eigenvectors[:, 0]
    populations /= populations.sum()

    for k in xrange(1, left_eigenvectors.shape[-1]):
        x = left_eigenvectors[:, k]
        x /= abs(np.dot(x / populations, x)) ** .5
Exemple #57
0
def load_prep_trajectories(project, stride, atom_indices, metric):
    """load the trajectories but prepare them during the load.
    This is helpful for metrics that use dimensionality reduction
    so you can use more frames without a MemoryError
    """
    list_of_ptrajs = []
    which = []
    for i in xrange(project.n_trajs):

        which_frames = np.arange(0, project.traj_lengths[i], stride)

        which.extend(zip([i] * len(which_frames), which_frames))

        ptraj = []
        for chunk in md.iterload(project.traj_filename(i), stride=stride, atom_indices=atom_indices):
            ptrj_chunk = metric.prepare_trajectory(chunk)
            ptraj.append(ptrj_chunk)

        ptraj = np.concatenate(ptraj)
        list_of_ptrajs.append(ptraj)

    return list_of_ptrajs, np.array(which)
Exemple #58
0
def test_mfpt2():
    tprob = np.array([[0.90, 0.10], [0.22, 0.78]])

    pi0 = 1
    # pi1 T[1, 0] = pi0 T[0, 1]
    pi1 = pi0 * tprob[0, 1] / tprob[1, 0]
    pops = np.array([pi0, pi1]) / (pi0 + pi1)

    msm = MarkovStateModel(lag_time=1)
    msm.transmat_ = tprob
    msm.n_states_ = 2
    msm.populations_ = pops

    mfpts = np.vstack([tpt.mfpts(msm, i) for i in xrange(2)]).T

    #print(1 / (1 - tprob[0, 0]), mfpts[0, 1])
    #print(1 / (1 - tprob[1, 1]), mfpts[1, 0])

    # since it's a 2x2 the mfpt from 0 -> 1 is the
    # same as the escape time of 0
    npt.assert_almost_equal(1 / (1 - tprob[0, 0]), mfpts[0, 1])
    npt.assert_almost_equal(1 / (1 - tprob[1, 1]), mfpts[1, 0])
Exemple #59
0
def load_prep_trajectories(project, stride, atom_indices, metric):
    """load the trajectories but prepare them during the load.
    This is helpful for metrics that use dimensionality reduction
    so you can use more frames without a MemoryError
    """
    list_of_ptrajs = []
    which = []
    for i in xrange(project.n_trajs):

        which_frames = np.arange(0, project.traj_lengths[i], stride)

        which.extend(zip([i] * len(which_frames), which_frames))

        ptraj = []
        for chunk in md.iterload(project.traj_filename(i),
                                 stride=stride,
                                 atom_indices=atom_indices):
            ptrj_chunk = metric.prepare_trajectory(chunk)
            ptraj.append(ptrj_chunk)

        ptraj = np.concatenate(ptraj)
        list_of_ptrajs.append(ptraj)

    return list_of_ptrajs, np.array(which)