Exemple #1
0
def load_Trajs_generator(trajfiles_list, prmtop_file, stride, chunk):
    """
    Iteratively loads a list of NetCDF files and returns them
    as an iterable of mdtraj.Trajectory objects
    Parameters
    ----------
    trajfiles_list: list of str
            List with the names of trajectory files
    prmtop_file:  str
            Name of the prmtop file
    stride: int
            Frames to be used when loading the trajectories
    chunk:  int
            Number of frames to load at once from disk per iteration.
            If 0, load all.
    Yields
    ------
    frag: mdtraj.Trajectory
    """
    try:
        for traj in trajfiles_list:
            for frag in mdtraj.iterload(traj, chunk=chunk, top=prmtop_file,
                                        stride=stride):
                yield frag
    except OSError:
        # User passed a single long trajectory as a string
        # so there's no need to iterate through it.
        for frag in mdtraj.iterload(trajfiles_list,
                                    chunk=chunk,
                                    top=prmtop_file,
                                    stride=stride):
            yield frag
    def onJoinTrajectories(self):
        target_filename = str(QtWidgets.QFileDialog.getSaveFileName(None, 'Save H5-Model file', '', 'H5-files (*.h5)'))[0]

        fn1 = self.trajectory_filename_1
        fn2 = self.trajectory_filename_2

        r1 = self.reverse_traj_1
        r2 = self.reverse_traj_2

        traj_1 = md.load_frame(fn1, index=0)
        traj_2 = md.load_frame(fn2, index=0)

        # Create empty trajectory
        if self.join_mode == 'time':
            traj_join = traj_1.join(traj_2)
            axis = 0
        elif self.join_mode == 'atoms':
            traj_join = traj_1.stack(traj_2)
            axis = 1

        target_traj = md.Trajectory(xyz=np.empty((0, traj_join.n_atoms, 3)), topology=traj_join.topology)
        target_traj.save(target_filename)

        chunk_size = self.chunk_size
        table = tables.open_file(target_filename, 'a')
        for i, (c1, c2) in enumerate(izip(md.iterload(fn1, chunk=chunk_size), md.iterload(fn2, chunk=chunk_size))):
            xyz_1 = c1.xyz[::-1] if r1 else c1.xyz
            xyz_2 = c2.xyz[::-1] if r2 else c2.xyz
            xyz = np.concatenate((xyz_1, xyz_2), axis=axis)

            table.root.coordinates.append(xyz)
            table.root.time.append(np.arange(i * chunk_size, i * chunk_size + xyz.shape[0], dtype=np.float32))

        table.close()
Exemple #3
0
    def iterload(self, i, chunk):
        if self.verbose:
            print('[MDTraj dataset] iterloading %s' % self.filename(i))

        if self._topology is None:
            return md.iterload(
                self.filename(i), chunk=chunk, stride=self.stride,
                atom_indices=self.atom_indices)
        else:
            return md.iterload(
                self.filename(i), chunk=chunk, stride=self.stride,
                atom_indices=self.atom_indices, top=self._topology)
Exemple #4
0
    def iterload(self, i, chunk):
        if self.verbose:
            print('[MDTraj dataset] iterloading %s' % self.filename(i))

        if self._topology is None:
            return md.iterload(
                self.filename(i), chunk=chunk, stride=self.stride,
                atom_indices=self.atom_indices)
        else:
            return md.iterload(
                self.filename(i), chunk=chunk, stride=self.stride,
                atom_indices=self.atom_indices, top=self._topology)
def run(project, atom_indices=None, traj_fn='all'):

    n_atoms = project.load_conf().n_atoms

    if traj_fn.lower() == 'all':

        SASA = np.ones(
            (project.n_trajs, np.max(project.traj_lengths), n_atoms)) * -1

        for traj_ind in xrange(project.n_trajs):
            traj_asa = []
            logger.info("Working on Trajectory %d", traj_ind)
            traj_fn = project.traj_filename(traj_ind)
            chunk_ind = 0
            for traj_chunk in md.iterload(traj_fn,
                                          atom_indices=atom_indices,
                                          chunk=1000):
                traj_asa.extend(md.shrake_rupley(traj_chunk))
                chunk_ind += 1
            SASA[traj_ind, 0:project.traj_lengths[traj_ind]] = traj_asa

    else:
        traj_asa = []
        for traj_chunk in Trajectory.enum_chunks_from_lhdf(
                traj_fn, AtomIndices=atom_indices):
            traj_asa.extend(asa.calculate_asa(traj_chunk))

        SASA = np.array(traj_asa)

    return SASA
Exemple #6
0
def PCA_mem(traj, top):
    print("The PCA is performed while saving memory, it may take some time.")
    flag = False
    N = 0
    mean_str = md.load(top)
    mean_vec = mean_str.xyz.astype(np.float64).reshape(1, mean_str.n_atoms * 3)
    for frame in md.iterload(traj, top=top, chunk=100000):
        N += frame.n_frames
        if not flag:
            X_1 = np.array([0.0] * frame.n_atoms * 3, dtype=np.float64)
            X_X = np.array([[0.0] * frame.n_atoms * 3
                            for i in range(frame.n_atoms * 3)],
                           dtype=np.float64)
            flag = True
        X = frame.xyz.astype(np.float64).reshape(frame.n_frames,
                                                 frame.n_atoms * 3) - mean_vec
        X_1 += X.sum(axis=0)
        X_X += np.tensordot(X, X, axes=(0, 0))
    cov_mat = np.empty((len(X_1), len(X_1)), dtype=np.float64)
    cov_mat = (X_X - np.dot(X_1.reshape(len(X_1), 1),
                            (X_1.reshape(len(X_1), 1)).T) / N) / (N - 1)
    print("Covariance matrix calculated (%s,%s)" % cov_mat.shape)
    trace = np.matrix.trace(cov_mat)
    print('Trace of the covariance matrix: %s' % trace)
    eig_vals, eig_vecs = np.linalg.eigh(cov_mat)
    return eig_vals[::-1], eig_vecs, cov_mat
Exemple #7
0
def bead_tetrahedrality(fn_traj, fn_top, fn_save, ibead, len_chunk=100, select_A ='name O', select_B='name O'):
    Qs = np.array([])
    t0 = time.time()
    print('Processing bead %d...' % ibead)
    print('')
    top = md.load(fn_top).topology
    trj = md.iterload(fn_traj, top=top, chunk=len_chunk)    
    # Prepare index pairs
    idx_A = top.select(select_A)
    idx_B = top.select(select_B)
    n_A   = len(idx_A)
    n_B   = len(idx_B)
    pairs = []

    for iB in idx_B:
        for iA in idx_A:
            pairs.append((iB, iA))
    pairs = np.array(pairs, dtype=int)
    i_frame = 0

    for chunk in trj:
        neighbors = extract_neighbors(chunk, pairs, 4, n_A, n_B)
        
        for i in range(len_chunk):
            # Iteration over chunk is necessary because neighbors
            # are not fixed over the trajectory.
            Qs = np.append(Qs, extract_Q(chunk[i], neighbors[i], idx_A))
        i_frame += len_chunk

    np.savetxt(fn_save, Qs)
    t1 = time.time()
    print('Processing bead %d took %.2f minutes.' % (ibead, (t1-t0)/60.0)) 
    print('')
    def onSaveTrajectory(self, target_filename=None):
        if target_filename is None:
            target_filename = str(QtWidgets.QFileDialog.getSaveFileName(None, 'Save H5-Model file', '', 'H5-files (*.h5)'))[0]

        translation_vector = self.translation_vector
        rotation_matrix = self.rotation_matrix
        stride = self.stride

        if self.verbose:
            print("Stride: %s" % stride)
            print("\nRotation Matrix")
            print(rotation_matrix)
            print("\nTranslation vector")
            print(translation_vector)

        first_frame = md.load_frame(self.trajectory_filename, 0)
        traj_new = md.Trajectory(xyz=np.empty((1, first_frame.n_atoms, 3)), topology=first_frame.topology)
        traj_new.save(target_filename)

        chunk_size = 1000
        table = tables.open_file(target_filename, 'a')
        for i, chunk in enumerate(md.iterload(self.trajectory_filename, chunk=chunk_size, stride=stride)):
            xyz = chunk.xyz.copy()
            rotate(xyz, rotation_matrix)
            translate(xyz, translation_vector)
            table.root.xyz.append(xyz)
            table.root.time.append(np.arange(i * chunk_size, i * chunk_size + xyz.shape[0], dtype=np.float32))
        table.close()
    def onProcessTrajectory(self):
        print("onProcessTrajectory")
        energy_file = chisurf.widgets.save_file(
            description='Save energies', file_type='CSV-name file (*.txt)')

        s = 'FrameNbr\t'
        for p in self.universe.potentials:
            s += '%s\t' % p.name
        s += '\n'
        chisurf.fio.zipped.open_maybe_zipped(filename=energy_file,
                                             mode='w').write(s)

        self.structure = chisurf.structure.TrajectoryFile(
            mdtraj.load_frame(self.trajectory_file, 0))[0]
        i = 0
        for chunk in mdtraj.iterload(self.trajectory_file):
            for frame in chunk:
                self.structure.xyz = frame.xyz * 10.0
                self.structure.update_dist()
                s = '%i\t' % (i * self.stride + 1)
                for e in self.universe.getEnergies(self.structure):
                    s += '%.3f\t' % e
                print(s)
                s += '\n'
                i += 1
                open(energy_file, 'a').write(s)
Exemple #10
0
def calc_diffusion(trajfile, topfile, beta, s_frames, s, n_dim, n_frames_tot):
    A = np.zeros((n_dim, n_dim), float)

    avg_dxi_dxj = np.zeros((n_dim, n_dim), float)
    avg_dxi = np.zeros(n_dim, float)

    total_n_iters = int(np.round(n_frames_tot / 1000))
    iteration_idx = 0
    N = 0
    for chunk in md.iterload(trajfile, top=topfile, chunk=1000):
        if ((iteration_idx + 1) % 10) == 0:
            print("  ({}/{})".format(iteration_idx + 1, total_n_iters))
            sys.stdout.flush()

        xyz_flat = np.reshape(chunk.xyz, (chunk.n_frames, n_dim))
        dx = xyz_flat[s_frames:] - xyz_flat[:-s_frames]
        avg_dxi_dxj += np.dot(dx.T, dx)
        avg_dxi += dx
        N += chunk.n_frames - s_frames

    avg_dxi_avg_dxj = np.outer(avg_dxi, avg_dxi)
    D = (beta / (2 * s * float(N))) * avg_dxi_dxj
    D_stock = (beta / (2 * s * float(N))) * (avg_dxi_dxj - avg_dxi_avg_dxj)

    return D, D_stock
Exemple #11
0
def read_xtc(xtc, top, chunk=100, stride=1):
    """Read Gromacs XTC trajectory file iteratively with mdtraj.iterload

    Parameters
    ----------
    xtc : str
        input xtc file name
    top : str
        input topology information file, a pdb
    chunk : int,
        number of frames per chunk
    stride : int,
        dt, save a frame every N number of frames

    Returns
    -------
    trajs : list,
        a list of mdtraj.Trajectory object
    """
    trajs = []
    for chunk in mt.iterload(xtc, chunk=chunk, top=top, stride=stride):
        trajs.append(chunk)

    print("Number of chunks: ", len(trajs))

    return trajs
def s_order(job):
    dim = 1
    box_range = [0.167, 1.167]
    pore_center = (box_range[1]-box_range[0])/2 + box_range[0]
    fig, ax = plt.subplots()
    s_list = list()
    for trj in md.iterload(os.path.join(job.ws, 'nvt.dcd'), top=os.path.join(job.ws, 'init.mol2'), chunk=9000, skip=2001):
        water_bonds = get_bond_array(trj)
        bins, s_values = compute_s(trj,
                                   dim,
                                   pore_center=pore_center,
                                   bond_array=water_bonds)
        s_list.append(s_values)

    s_mean = np.mean(s_list, axis=0)
    s_std = np.std(s_list, axis=0)

    plt.plot(bins, s_mean)
    plt.fill_between(bins, s_mean + s_std, s_mean - s_std, alpha=0.2)
    plt.xlabel('z-position (nm)')
    plt.ylabel('S')

    with job:
        plt.savefig('s_order.pdf')

        np.savetxt('s_order.txt', np.transpose(np.vstack([bins, s_mean, s_std])),
                   header='Bins\tS_mean\tS_std')
    np.savetxt(f'data/{job.sp.nwater}_mol_s_order.txt', np.transpose(np.vstack([bins, s_mean, s_std])),
               header='Bins\tS_mean\tS_std')
Exemple #13
0
def load_Trajs(trajfiles_list, prmtop_file, stride=1, chunk=1000):
    """
    Iteratively loads a list of NetCDF files and returns them
    as a list of mdtraj.Trajectory objects

    Parameters
    ----------
    trajfiles_list: list of str
            List with the names of trajectory files
    prmtop_file:  str
            Name of the prmtop file
    stride: int
            Frames to be used when loading the trajectories
    chunk:  int
            Number of frames to load at once from disk per iteration.
            If 0, load all.

    Returns
    -------
    list_chunks: list
            List of mdtraj.Trajectory objects, each of 'chunk' lenght
    """
    list_chunks = []
    for traj in trajfiles_list:
        for frag in mdtraj.iterload(traj, chunk=chunk, top=prmtop_file,
                                    stride=stride):
            list_chunks.append(frag)
    return(list_chunks)
Exemple #14
0
def xtcs2mindists(xtcs, top,
                  stride=1,
                  chunksize=1000, **COM_kwargs):

    #TODO avoid code repetition with xtcs2ctcs
    inform = lambda ixtc, ii, running_f: print(
        "Analysing %20s in chunks of %3u frames. chunks read %4u. frames read %8u" % (ixtc, chunksize, ii, running_f),
        end="\r", flush=True)

    ctc_mins, ctc_pairs = [],[]
    for ii, ixtc in enumerate(xtcs):
        running_f = 0
        inform(ixtc, 0, running_f)
        ires = {}
        for jj, igeom in enumerate(_md.iterload(ixtc, top=top, stride=stride, chunk=_np.round(chunksize/stride))):
            running_f += igeom.n_frames
            inform(ixtc, jj, running_f)
            mins, pairs, pair_idxs = igeom2mindist_COMdist_truncation(igeom, **COM_kwargs)
            for imin, ipair, idx in zip(mins, pairs, pair_idxs):
                try:
                    ires[idx]["val"] = _np.min((ires[idx]["val"], imin))
                except:
                    ires[idx] = {"val":imin,
                                 "pair":ipair}

            #if jj==5:
            #   break

        pair_idxs = sorted(ires.keys())
        ctc_mins.append( _np.array([ires[idx]["val"] for idx in pair_idxs]))
        ctc_pairs.append(_np.array([ires[idx]["pair"] for idx in pair_idxs]))
    print()
    return ctc_mins, ctc_pairs
Exemple #15
0
def load_trajchunks(traj,
                    parm,
                    start=1,
                    stride=1,
                    standard_names=True,
                    **kwargs):
    """Loads a file into a generator of MDtraj trajectory chunks.
       Useful for large/memory intensive trajectory files
       Usage: load_trajchunks(traj, parm, [start=1, stride=1, **kwargs])
       Standard kwargs include chunk (size of the trajectory chunks
       to load per iteration), and atom_indices (an array of 0-indexed
       atoms to keep).
       
       'standard_names=False' (not the default here, or in MDTraj)
       may also be useful for PDB topologies, otherwise amide H might
       be renamed from the atom names provided to the standard PDB identifiers
       (e.g. 'H', 'H2', 'H3' for the terminal NH3 group). 
   
       Returns a generator object with trajectory iterations."""
    try:
        parmobj = md.load_topology(parm, standard_names=standard_names)
    except TypeError:
        parmobj = md.load_topology(
            parm)  # MDTraj only has standard_names kwarg for certain filetypes
    return md.iterload(traj,
                       top=parmobj,
                       skip=start - 1,
                       stride=stride,
                       **kwargs)  # Start is zero indexed
    def onRemoveClashes(self):
        target_filename = chisurf.widgets.save_file('H5-Trajectory file',
                                                    'H5-File (*.h5)')
        # target_filename = 'clash_dimer.h5'
        filename = self.trajectory_filename
        stride = self.stride
        min_distance = self.min_distance

        # Make empty trajectory
        frame_0 = md.load_frame(filename, 0)
        target_traj = md.Trajectory(xyz=np.empty((0, frame_0.n_atoms, 3)),
                                    topology=frame_0.topology)
        #atom_indices = np.array(self.atom_list)
        atom_selection = self.atom_list
        atom_list = target_traj.top.select(atom_selection)
        target_traj.save(target_filename)

        chunk_size = 1000
        for i, chunk in enumerate(
                md.iterload(filename, chunk=chunk_size, stride=stride)):
            xyz = chunk.xyz.copy()
            frames_below = below_min_distance(xyz,
                                              min_distance,
                                              atom_list=atom_list)
            selection = np.where(frames_below < 1)[0]
            xyz_clash_free = np.take(xyz, selection, axis=0)
            with tables.open_file(target_filename, 'a') as table:
                table.root.coordinates.append(xyz_clash_free)
                times = np.arange(table.root.time.shape[0],
                                  table.root.time.shape[0] +
                                  xyz_clash_free.shape[0],
                                  dtype=np.float32)
                table.root.time.append(times)
Exemple #17
0
def get_n_frames(trajfile, topfile):
    n_frames_tot = 0
    for chunk in md.iterload(trajfile, top=topfile):
        n_frames_tot += chunk.n_frames
    n_frames_tot = float(n_frames_tot)
    n_dim = 3 * chunk.xyz.shape[1]
    return n_frames_tot, n_dim
def evaluating_contacts_chunk(pdb_file, xtc_file, pairs_indexes, r_initial, \
                              threshold=1.5, chunk=10000):
    """
    Function to evaluate the number of contacts for each given timestep.
    Input:
     pdb_file - File with your structure (PDB or GRO files for instance).
     xtc_file - Trajectory.
     pairs_indexes - Numpy array Nx2 with the pairs to be used to evaluate \
     the contacts. (The first two columns of the pairs section in the TPR file \
     without the header).
     r_initial - Initial distance for each given pair to be used as a reference.
     threshold - Value to be used as a threshold to evaluate the contacts.
     chunk - Size of each chunk in which the trajectory will be analyzed.
    Output: Nx1 numpy array with the total number of contacts for each \
     timestep.
    """
    contacts = []
    for chunk_trajectory in md.iterload(xtc_file, top=pdb_file, chunk=chunk):
        trajectory = md.compute_distances(chunk_trajectory, pairs_indexes)
        print((chunk_trajectory))
        contacts.append(np.sum(np.less_equal(trajectory, np.multiply(r_initial,\
                                                           threshold)), axis=1))

    contacts = np.concatenate((contacts))
    return contacts
Exemple #19
0
def calc_chunkwise_noavg(func,
                         traj_list,
                         top_file,
                         chunk_size=1,
                         dim=1,
                         stride=1,
                         skip=0):
    # This function computes some observable from an md traj in trunks, as to not use too much memory
    # It assumes that the passed in function has no additional input (e.g., use a lambda function)
    # and that the output is to be (non-weighted) averaged over chuncks
    count = 0
    for i in range(len(traj_list)):
        for chunk in md.iterload(traj_list[i],
                                 chunk=chunk_size,
                                 top=top_file,
                                 stride=stride,
                                 skip=skip):
            func_ret_tmp = func(chunk)
            if (count == 0):
                func_ret = np.array(func_ret_tmp)
            else:
                if (dim == 1):
                    func_ret = np.concatenate(
                        (func_ret, np.array(func_ret_tmp)), axis=0)
                else:  # this is not yet tested!!
                    for j in range(dim):
                        func_ret[j] = np.concatenate(
                            (func_ret[j], np.array(func_ret_tmp)[j]), axis=0)
            count += 1
    return func_ret, count
Exemple #20
0
def load_Trajs(trajfiles_list, prmtop_file, stride, chunk):
    """
    Iteratively loads a list of NetCDF files and returns them
    as a list of mdtraj.Trajectory objects

    Parameters
    ----------
    trajfiles_list: list of str
            List with the names of trajectory files
    prmtop_file:  str
            Name of the prmtop file
    stride: int
            Frames to be used when loading the trajectories
    chunk:  int
            Number of frames to load at once from disk per iteration.
            If 0, load all.

    Returns
    -------
    list_chunks: list
            List of mdtraj.Trajectory objects, each of 'chunk' lenght
    """
    list_chunks = []
    for traj in trajfiles_list:
        for frag in md.iterload(traj, chunk=chunk, top=prmtop_file,
                                stride=stride):
            list_chunks.append(frag)
    return(list_chunks)
Exemple #21
0
def get_rg_for_run(name, ply_idxs, pdb, use_cent, recalc):

    topfile, trajnames = get_trajnames(name, use_cent)
    rg_for_run = []
    for j in range(len(trajnames)):
        idx = j + 1
        if use_cent:
            tname = name + "_traj_cent_" + str(idx) + ".dcd"
        else:
            tname = name + "_traj_" + str(idx) + ".dcd"

        rg_name = "rg_{}.npy".format(idx)
        if not os.path.exists(rg_name) or recalc:
            if not os.path.exists(tname):
                raise IOError(tname + " does not exist!")

            last_change = np.abs(os.path.getmtime(tname) - time.time()) / 60.
            if last_change > 5:
                # only calculate if traj has been modified in last five minutes.
                # this is meant to check if traj is still running.
                Rg = []
                for chunk in md.iterload(tname, top=pdb,
                                         atom_indices=ply_idxs):
                    rg = md.compute_rg(chunk)
                    Rg.append(rg)
                Rg = np.concatenate(Rg)
                print("  " + rg_name)
                np.save(rg_name, Rg)
            else:
                Rg = None
        else:
            Rg = np.load(rg_name)
        if not (Rg is None):
            rg_for_run.append(Rg)
    return rg_for_run
    def _extract_dipoles_and_volumes(self):
        """Extract the systems dipole moments and volumes.

        Returns
        -------
        numpy.ndarray
            The dipole moments of the trajectory (shape=(n_frames, 3), dtype=float)
        numpy.ndarray
            The volumes of the trajectory (shape=(n_frames, 1), dtype=float)
        """
        import mdtraj

        dipole_moments = []
        volumes = []
        charge_list = self._extract_charges()

        for chunk in mdtraj.iterload(self.trajectory_path,
                                     top=self.input_coordinate_file,
                                     chunk=50):

            dipole_moments.extend(
                mdtraj.geometry.dipole_moments(chunk, charge_list))
            volumes.extend(chunk.unitcell_volumes)

        dipole_moments = np.array(dipole_moments)
        volumes = np.array(volumes)

        return dipole_moments, volumes
Exemple #23
0
def test_iterload_skip():
    files = [
        'frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd',
        'frame0.binpos', 'frame0.xyz', 'frame0.lammpstrj'
    ]
    if not (on_win and on_py3):
        files.append('legacy_msmbuilder_trj0.lh5')

    err_msg = "failed for file %s with chunksize %i and skip %i"

    for file in files:
        for cs in [0, 1, 11, 100]:
            for skip in [0, 1, 20, 101]:
                print("testing file %s with skip=%i" % (file, skip))
                t_ref = md.load(get_fn(file), top=get_fn('native.pdb'))
                t = functools.reduce(
                    lambda a, b: a.join(b),
                    md.iterload(get_fn(file),
                                skip=skip,
                                top=get_fn('native.pdb'),
                                chunk=cs))
                eq(t_ref.xyz[skip:], t.xyz, err_msg=err_msg % (file, cs, skip))
                eq(t_ref.time[skip:],
                   t.time,
                   err_msg=err_msg % (file, cs, skip))
                eq(t_ref.topology,
                   t.topology,
                   err_msg=err_msg % (file, cs, skip))
Exemple #24
0
 def Readtraj(self, filename=None, topfile=None, chunk=0, stride=1, atom_indices=None, skip=0, name=''):
     '''
     Returns MDTraj trajectory iterator
     Input:
     filename: Path to the trajectory file on disk (with file extension)
     chunk: Number of frames to load at once from disk per iteration. If 0, load all.
     top: Topology file for the trajectory (for example a .gro or .pdb file of starting structure).
     For SIFT caluclation, topology must be read using .pdb format as MDTraj doesn't provide bonds information for .gro file
     stride: Read every nth-frame, default: 1
     atom_indices: read only a subset of atom coodinates if not None.
     skip: Skipt first n frames.
     '''
     #Check if file exists
     if not os.path.isfile(filename):
         raise IOError("Cannot locate file: %s" % filename)
     if topfile != None and not os.path.isfile(topfile):
         raise IOError("Cannot locate file: %s" % topfile)
     #Call MDTraj iterload function
     self.traj_iter = md.iterload(filename=filename, chunk=chunk, top=topfile, stride=stride, atom_indices=atom_indices, skip=skip)
     #Save the read state for Reloading trajectory iterator
     self.Save_ReadState(filename=filename, chunk=chunk, topfile=topfile, stride=stride, atom_indices=atom_indices, skip=skip)
     #set the topology varibale
     for chunk in self.traj_iter:
         self.topology= chunk.topology
         break
     #Find and set small_mols if any small molecule in trajectory
     self.Sense_SmallMol()
     #Reload the instance traj_iter variable
     self.Reload()
     self.name=name
Exemple #25
0
def bin_covariance_multiple_coordinates_for_traj(trajfile,covar_by_bin,count_by_bin,
        observable1,observable2,obs1_bin_avg,obs2_bin_avg,
        binning_coord,bin_edges,topology,chunksize):
    """Loop over chunks of a trajectory to bin a set of observables along a 1D coordinate"""
    ## TODO test cases:
    # - Two vector-valued observables
    # - One single-valued obesrvable and one-vector-valued observable.
    # - Two single-valued observables

    # In order to save memory we loop over trajectories in chunks.
    start_idx = 0
    for trajchunk in md.iterload(trajfile,top=topology,chunk=chunksize):
        # Calculate observable for trajectory chunk
        obs1_temp = observable1(trajchunk)
        obs2_temp = observable2(trajchunk)
        chunk_size = trajchunk.n_frames
        coord = binning_coord[start_idx:start_idx + chunk_size]
        # Sort frames into bins along binning coordinate.
        for n in range(bin_edges.shape[0]):
            frames_in_this_bin = (coord >= bin_edges[n][0]) & (coord < bin_edges[n][1])
            if frames_in_this_bin.any():
                # Compute the covariance
                delta_obs1 = obs1_temp[frames_in_this_bin] - obs1_bin_avg[n]
                delta_obs2 = obs2_temp[frames_in_this_bin] - obs2_bin_avg[n]
    
                # How should result be collected depending on the number of return values?
                covar_by_bin[n,:,:] = np.dot(delta_obs1.T,delta_obs2)
                count_by_bin[n] += float(sum(frames_in_this_bin))
        start_idx += chunk_size
    return covar_by_bin,count_by_bin
Exemple #26
0
    def _execute(self, directory, available_resources):

        import mdtraj

        charges = self._extract_charges(self.parameterized_system.system)
        charge_derivatives = self._compute_charge_derivatives(len(charges))

        dipole_moments = []
        dipole_gradients = {key: [] for key in self.gradient_parameters}

        for chunk in mdtraj.iterload(
            self.trajectory_path, top=self.parameterized_system.topology_path, chunk=50
        ):

            xyz = chunk.xyz.transpose(0, 2, 1) * unit.nanometers

            dipole_moments.extend(xyz.dot(charges))

            for key in self.gradient_parameters:
                dipole_gradients[key].extend(xyz.dot(charge_derivatives[key]))

        self.dipole_moments = ObservableArray(
            value=np.vstack(dipole_moments),
            gradients=[
                ParameterGradient(key=key, value=np.vstack(dipole_gradients[key]))
                for key in self.gradient_parameters
            ],
        )
Exemple #27
0
def test_iterload_skip():
    files = [
        "frame0.nc",
        "frame0.h5",
        "frame0.xtc",
        "frame0.trr",
        "frame0.dcd",
        "frame0.binpos",
        "frame0.xyz",
        "frame0.lammpstrj",
    ]
    if not (on_win and on_py3):
        files.append("legacy_msmbuilder_trj0.lh5")

    err_msg = "failed for file %s with chunksize %i and skip %i"

    for file in files:
        for cs in [0, 1, 11, 100]:
            for skip in [0, 1, 20, 101]:
                print("testing file %s with skip=%i" % (file, skip))
                t_ref = md.load(get_fn(file), top=get_fn("native.pdb"))
                t = functools.reduce(
                    lambda a, b: a.join(b), md.iterload(get_fn(file), skip=skip, top=get_fn("native.pdb"), chunk=cs)
                )
                eq(t_ref.xyz[skip:], t.xyz, err_msg=err_msg % (file, cs, skip))
                eq(t_ref.time[skip:], t.time, err_msg=err_msg % (file, cs, skip))
                eq(t_ref.topology, t.topology, err_msg=err_msg % (file, cs, skip))
def run(project, atom_indices=None, traj_fn = 'all'):

    n_atoms = project.load_conf().n_atoms

    if traj_fn.lower() == 'all':

        SASA = np.ones((project.n_trajs, np.max(project.traj_lengths), n_atoms)) * -1

        for traj_ind in xrange(project.n_trajs):
            traj_asa = []
            logger.info("Working on Trajectory %d", traj_ind)
            traj_fn = project.traj_filename(traj_ind)
            chunk_ind = 0
            for traj_chunk in md.iterload(traj_fn, atom_indices=atom_indices, chunk=1000):
                traj_asa.extend(md.shrake_rupley(traj_chunk))
                chunk_ind += 1
            SASA[traj_ind, 0:project.traj_lengths[traj_ind]] = traj_asa

    else:
        traj_asa = []
        for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices ):
            traj_asa.extend( asa.calculate_asa( traj_chunk ) )

        SASA = np.array(traj_asa)

    return SASA
Exemple #29
0
def calculate_contacts(dirs, contact_function, native_pairs, nonnative_pairs,
                       r0_native, r0_nonnative):
    """Calculate contacts for trajectories"""
    n_frames = np.sum(
        [file_len("%s/Q.dat" % dirs[i]) for i in range(len(dirs))])

    Qi_contacts = np.zeros((n_frames, native_pairs.shape[0]), float)
    Ai_contacts = np.zeros((n_frames, nonnative_pairs.shape[0]), float)

    logging.info("calculating native/nonnative contacts")
    chunk_sum = 0
    # Loop over trajectory subdirectories.
    for n in range(len(trajfiles)):
        # Loop over chunks of each trajectory.
        for chunk in md.iterload(trajfiles[n], top="%s/Native.pdb" % dirs[0]):
            chunk_len = chunk.n_frames

            r_temp = md.compute_distances(chunk, native_pairs, periodic=False)
            Qi_temp = contact_function(r_temp, r0_native)
            Qi_contacts[chunk_sum:chunk_sum + chunk_len, :] = Qi_temp

            r_temp = md.compute_distances(chunk,
                                          nonnative_pairs,
                                          periodic=False)
            Ai_temp = contact_function(r_temp, r0_nonnative)
            Ai_contacts[chunk_sum:chunk_sum + chunk_len, :] = Ai_temp

            chunk_sum += chunk_len

    A = np.sum(Ai_contacts, axis=1)

    return Qi_contacts, Ai_contacts, A
def number_density(job):
    dim = 1
    box_range = [0.5, 1.5]
    pore_center = (box_range[1] - box_range[0]) / 2 + box_range[0]
    o_densities = list()
    h_densities = list()
    fig, ax = plt.subplots()
    for trj in md.iterload(
            os.path.join(job.ws, "carbon_water-pos-1.pdb"),
            top=os.path.join(job.ws, "init.mol2"),
            chunk=5000,
            skip=6000,
    ):
        water_o = trj.atom_slice(trj.topology.select("name O"))
        water_h = trj.atom_slice(trj.topology.select("name H"))
        area = trj.unitcell_lengths[0][0] * trj.unitcell_lengths[0][2]

        for water_trj in (water_o, water_h):
            bins, density = compute_density(water_trj,
                                            area,
                                            dim,
                                            pore_center=pore_center,
                                            bin_width=0.01)
            label_name = list(set([i.name for i in water_trj.topology.atoms]))
            if label_name[0] == "O":
                o_densities.append(density)
            else:
                h_densities.append(density)

    o_mean = np.mean(o_densities, axis=0)
    h_mean = np.mean(h_densities, axis=0)
    o_std = np.std(o_densities, axis=0)
    h_std = np.std(h_densities, axis=0)

    plt.plot(bins, o_mean, label="O")
    plt.fill_between(bins, o_mean + o_std, o_mean - o_std, alpha=0.2)
    plt.plot(bins, h_mean, label="H")
    plt.fill_between(bins, h_mean + h_std, h_mean - h_std, alpha=0.2)
    plt.xlabel("z-position (nm)")
    plt.ylabel("Number Density ($nm^-3$)")

    plt.legend()
    with job:
        np.savetxt(
            project.root_directory() +
            "/data/{}/o_density.txt".format(str(job.sp.nwater) + "water_data"),
            np.transpose(np.vstack([bins, o_mean, o_std])),
            header="Bins\tDensity_mean\tDensity_std",
        )

        np.savetxt(
            project.root_directory() +
            "/data/{}/h_density.txt".format(str(job.sp.nwater) + "water_data"),
            np.transpose(np.vstack([bins, h_mean, h_std])),
            header="Bins\tDensity_mean\tDensity_std",
        )
        plt.savefig(project.root_directory() +
                    "/data/{}/numberdensity.pdf".format(
                        str(job.sp.nwater) + "water_data"))
Exemple #31
0
def junk():
    cv_traj = []
    for chunk in md.iterload(tname, top=topfile):
        xyz_chunk = np.reshape(chunk.xyz, (-1, 75))
        cv_chunk = Ucg.calculate_cv(xyz_chunk)

        cv_traj.append(cv_chunk)
    cv_traj = np.concatenate(cv_traj, axis=0)
Exemple #32
0
def calc_coordinate_for_traj(trajfile,observable_fun,topology,chunksize):
    """Loop over chunks of a trajectory to calculate 1D observable"""
    # In order to save memory we loop over trajectories in chunks.
    obs_traj = []
    for trajchunk in md.iterload(trajfile,top=topology,chunk=chunksize):
        # Calculate observable for trajectory chunk
        obs_traj.extend(observable_fun(trajchunk))
    return np.array(obs_traj)
Exemple #33
0
def test_md_join():
    t_ref = md.load(get_fn('frame0.h5'))[:20]
    loaded = md.load(fn, top=t_ref, stride=2)
    iterloaded = md.join(md.iterload(fn, top=t_ref, stride=2, chunk=6))
    eq(loaded.xyz, iterloaded.xyz)
    eq(loaded.time, iterloaded.time)
    eq(loaded.unitcell_angles, iterloaded.unitcell_angles)
    eq(loaded.unitcell_lengths, iterloaded.unitcell_lengths)
Exemple #34
0
def test_md_join():
    t_ref = md.load(get_fn('frame0.h5'))[:20]
    loaded = md.load(fn, top=t_ref, stride=2)
    iterloaded = md.join(md.iterload(fn, top=t_ref, stride=2, chunk=6))
    eq(loaded.xyz, iterloaded.xyz)
    eq(loaded.time, iterloaded.time)
    eq(loaded.unitcell_angles, iterloaded.unitcell_angles)
    eq(loaded.unitcell_lengths, iterloaded.unitcell_lengths)
Exemple #35
0
 def test():
     for stride in [1, 2, 3]:
         loaded = md.load(fn, top=t_ref, stride=stride)
         iterloaded = functools.reduce(lambda a, b: a.join(b), md.iterload(fn, top=t_ref, stride=stride, chunk=6))
         eq(loaded.xyz, iterloaded.xyz)
         eq(loaded.time, iterloaded.time)
         eq(loaded.unitcell_angles, iterloaded.unitcell_angles)
         eq(loaded.unitcell_lengths, iterloaded.unitcell_lengths)
Exemple #36
0
def compute_rmsd(fname, topname, sel="name CA", step=1):
    rmsd = []
    atom_indices = md.load(topname).topology.select(sel)
    top = md.load(topname)
    for chunk in md.iterload(fname, top=top, stride=step):
        rmsd.append(md.rmsd(chunk, top, 0, atom_indices=atom_indices))
    rmsd = np.concatenate(rmsd)
    return rmsd
Exemple #37
0
def test_chunk0_iterload():
    filename = 'frame0.h5'

    trj0 = md.load(get_fn(filename))

    for trj in md.iterload(get_fn(filename), chunk=0):
        pass

    eq(trj0.n_frames, trj.n_frames)
Exemple #38
0
def plot_rmsd(trajectories,
              topology=None,
              subset=None,
              output='rmsd.dat',
              chunksize=100,
              reimage=False):
    import mdtraj
    import numpy as np
    from tqdm import tqdm
    if topology:
        topology = mdtraj.load_topology(topology)
    if subset:
        subset = topology.select(subset)
    trajectories = sorted(trajectories, key=sort_key_for_numeric_suffixes)
    first_frame = mdtraj.load_frame(trajectories[0], 0, top=topology)
    frame_size = first_frame.xyz[0].nbytes
    if reimage:
        first_frame.image_molecules(inplace=True)
    rmsds = []
    for trajectory in tqdm(trajectories, unit='file'):
        _, ext = os.path.splitext(trajectory)
        total, unit_scale = None, None
        if ext.lower() == '.dcd':
            n_frames = round(
                os.path.getsize(trajectory) / frame_size,
                -1 * len(str(chunksize)[1:]))
            total = int(n_frames / chunksize)
            unit_scale = chunksize
        itertraj = mdtraj.iterload(trajectory, top=topology, chunk=chunksize)
        tqdm_kwargs = {
            'total': total,
            'unit': 'frames',
            'unit_scale': unit_scale,
            'postfix': {
                'traj': trajectory
            }
        }
        for chunk in tqdm(itertraj, **tqdm_kwargs):
            if reimage:
                chunk.image_molecules(inplace=True)
            rmsd = mdtraj.rmsd(chunk, first_frame,
                               atom_indices=subset) * 10.0  # nm->A
            rmsds.append(rmsd)

    rmsds = np.concatenate(rmsds)
    with open(output, 'w') as f:
        f.write('\n'.join(map(str, rmsds)))
    print('\nWrote RMSD values to', output)
    print('Plotting results...')
    plt.plot(rmsds)
    fig = plt.gca()
    fig.set_title('{}{}'.format(
        trajectories[0], ' and {} more'.format(
            len(trajectories[1:]) if len(trajectories) > 1 else '')))
    fig.set_xlabel('Frames')
    fig.set_ylabel('RMSD (A)')
    plt.show()
Exemple #39
0
    def __init__(self, traj_file, top, chunk=100, stride=1):
        self.iterator = md.iterload(traj_file,
                                    top=top,
                                    chunk=chunk,
                                    stride=stride)
        self.trajectory = None

        self.index = chunk - 1
        self.chunk = chunk
Exemple #40
0
 def load_data(self):
     load_time_start = time.time()
     data = []
     for tfn in self.filenames:
         kwargs = {} if tfn.endswith('h5') else {'top': self.top}
         for t in md.iterload(tfn, chunk=self.args.split, **kwargs):
             item = np.asarray(md.compute_dihedrals(t, self.indices), np.double)
             data.append(item)
     return data
Exemple #41
0
def bin_observable(trajfiles, observable, binning_coord, bin_edges, chunksize=10000):
    """Bin observable over trajectories

    Parameters
    ----------
    trajfiles : list
        List of trajectory file names to process. Can be full path to file. 

    observable : object
        A function that takes in an MDtraj trajectory object and returns a
        number.

    binning_coord : list
        List of multiple timeseries, each timeseries is used a reaction
        coordinate to histogram the frames of the corresponding trajectory.

    bin_edges : np.ndarray (n_bins,2)
        Edges of the bins used to histogram trajectory frames according 
        to values of binning_coord.

    chunksize : int, opt.
        Trajectories are processed in chunks. chunksize sets the number of
        frames in a chunk. Default: 10000

    Returns
    -------
    obs_bin_avg : np.ndarray (n_bins, observable.dimension)
        Average of observable in each bin along binning reaction coordinate.
    """

    assert len(binning_coord[0].shape) == 1
    assert bin_edges.shape[1] == 2

    obs_by_bin = np.zeros((bin_edges.shape[0],observable.dimension),float)
    count_by_bin = np.zeros(bin_edges.shape[0],float)
    for i in range(len(trajfiles)):
        start_idx = 0
        for trajchunk in mdtraj.iterload(trajfiles[i],top=observable.top,chunk=chunksize):
            obs_temp = observable.map(trajchunk)
            chunk_size = trajchunk.n_frames
            coord = binning_coord[i][start_idx:start_idx + chunk_size]
            # Assign frames in trajectory chunk to histogram bins.
            for n in range(bin_edges.shape[0]):
                frames_in_this_bin = (coord >= bin_edges[n][0]) & (coord < bin_edges[n][1])
                if np.any(frames_in_this_bin):
                    obs_by_bin[n,:] += np.sum(obs_temp[frames_in_this_bin],axis=0)
                    count_by_bin[n] += float(sum(frames_in_this_bin))
                # TODO: Break out of loop when all frames have been assigned.
                # Count n_frames_assigned. Break when n_frames_assigned == chunk_size
            start_idx += chunk_size
            
    obs_bin_avg = np.zeros((bin_edges.shape[0],observable.dimension),float)
    for n in range(bin_edges.shape[0]):
        if count_by_bin[n] > 0:
            obs_bin_avg[n,:] = obs_by_bin[n,:]/count_by_bin[n]
    return obs_bin_avg
def test_iterload():
    files = ['frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr',
             'frame0.dcd', 'frame0.binpos', 'legacy_msmbuilder_trj0.lh5']
    chunk = 100
    for stride in [1, 2, 5, 10]:
        for file in files:
            t_ref = md.load(get_fn(file), stride=stride, top=get_fn('native.pdb'))
            t = functools.reduce(lambda a, b: a.join(b), md.iterload(get_fn(file), stride=stride, top=get_fn('native.pdb'), chunk=100))
            eq(t_ref.xyz, t.xyz)
            eq(t_ref.time, t.time)
            eq(t_ref.topology, t.topology)
Exemple #43
0
def _fluctuation_matrix(reference_frame, trajectories_path, atom_subset, topology, chunk, first_frame):
    """
    This function computes the residual sum of squares of
    the reference frame and all the corresponding atoms
    in the provided frames
    
    Input:
        reference_frame: 
            numpy.array 
            array with the coordinates of reference frame/ 
            average conformation/ native conformation
        trajectories_path:
            str
            path of trajectories file of interest
        atom_subset:
            numpy.array
            array with all the atom numbers corresponding to selection
        topology:
            mdtraj.core.topology.Topology
        chunk:
            int
            number of frames to be loaded at a time.
            Note that this value can be defined in the main
            function.
        number_frames:
            int
            total number of frames of trajectories
        first_frame:
            mdtraj.core.trajectory.Trajectory
            trajectory of first frame        
    """
    residual_sum_squares = np.zeros((len(atom_subset)))
    
    ## now can compute the difference between the trajectory and its reference
    ## ri(t) - riref Using the mdtraj trajectory attribute xyz to extract
    ## the cartesian coordinates of trajectory and reference in a numpy array
    ## chunk.xyz.shape = (frames, atom, coordinate dimensions)
    
    
    number_of_frames=0                                
    trajectory_time=[]
    for chunk_i in md.iterload(trajectories_path, chunk = chunk, top=topology, atom_indices = atom_subset):
        trajectory_time.append(chunk_i.time)
        for atom in range(len(atom_subset)):
            diff = np.subtract(chunk_i.xyz[:, atom, :] * 10, reference_frame[atom])
            residual_sum_squares[atom] = residual_sum_squares[atom] + ((diff ** 2).sum(axis = 1).sum(axis=0))
        number_of_frames += chunk_i.xyz.shape[0]
    ## the result is a matrix with all fluctuations squared
    ## shape(number of frames * atom numbers, 3)
    ## from 0 to number of frames we have information of first atom
    ## then from number of frames to number of frames * 2 second atoms
    ## and so forth
    
    return residual_sum_squares, number_of_frames, trajectory_time
Exemple #44
0
def test_hashing():
    frames = [frame for frame in md.iterload(get_fn("frame0.xtc"), chunk=1, top=get_fn("native.pdb"))]
    hashes = [hash(frame) for frame in frames]
    # check all frames have a unique hash value
    assert len(hashes) == len(set(hashes))

    # change topology and ensure hash changes too
    top = frames[0].topology
    top.add_bond(top.atom(0), top.atom(1))

    last_frame_hash = hash(frames[0])
    assert last_frame_hash != hashes[-1]
Exemple #45
0
def regroup_DISK(trajs, topology_file, disctrajs, path, stride=1):
    """Regroups MD trajectories into clusters according to discretised trajectories.

    Parameters
    ----------
    trajs : list of strings 
        xtc/dcd/... trajectory file names
    topology_file : string
        name of topology file that matches `trajs`
    disctrajs : list of array-likes
        discretized trajectories
    path : string
        file system path to directory where cluster trajectories are written
    stride : int
        stride of disctrajs with respect to the (original) trajs

    Returns
    -------
    cluster : list of file names or `None`, len(cluster)=np.max(trajs)+1
        each element cluster[i] is either `None` if i wasn't found in disctrajs or
        is a the file name of a new trajectory that holds all frames that were 
        assigned to cluster i.
    """
    # handle single element invocation
    if not isinstance(trajs, list):
        trajs = [trajs]
    if not isinstance(disctrajs, list):
        disctrajs = [disctrajs]

    states = np.unique(np.hstack(([np.unique(disctraj) for disctraj in disctrajs])))
    states = np.setdiff1d(states, [-1])  # exclude invalid states
    writer = [None] * (max(states) + 1)
    cluster = [None] * (max(states) + 1)

    for i in states:
        cluster[i] = path + os.sep + ('%d.xtc' % i)
        writer[i] = XTCTrajectoryFile(cluster[i], 'w', force_overwrite=True)

    for disctraj, traj in zip(disctrajs, trajs):
        reader = md.iterload(traj, top=topology_file, stride=stride)
        start = 0
        for chunk in reader:
            chunk_length = chunk.xyz.shape[0]
            for i in xrange(chunk_length):
                cl = disctraj[i + start]
                if cl != -1:
                    writer[cl].write(chunk.xyz[i, :, :])  # np.newaxis?
            start += chunk_length
            # TODO: check that whole disctrajs was used
    for i in states:
        writer[i].close()

    return cluster
def read_and_featurize(traj_file, features_dir = None, condition=None, dihedral_types = ["phi", "psi", "chi1", "chi2"], dihedral_residues = None, resSeq_pairs = None, iterative = True):

	a = time.time()
	dihedral_indices = []
	residue_order = []
	if len(dihedral_residues) > 0:
		for dihedral_type in dihedral_types:
			if dihedral_type == "phi": dihedral_indices.append(phi_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "psi": dihedral_indices.append(psi_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "chi1": dihedral_indices.append(chi1_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "chi2": dihedral_indices.append(chi2_indices(fix_topology(top), dihedral_residues))

		#print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples)))

		#print("feauturizing manually:")
		dihedral_angles = []

		for dihedral_type in dihedral_indices:
			angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=dihedral_type))
			dihedral_angles.append(np.sin(angles))
			dihedral_angles.append(np.cos(angles))

		manual_features = np.transpose(np.concatenate(dihedral_angles))

	if len(resSeq_pairs) > 0:
		top = md.load_frame(traj_file, index=0).topology
		resIndex_pairs = convert_resSeq_to_resIndex(top, resSeq_pairs)
		contact_features = []
		if iterative:
			try:
				for chunk in md.iterload(traj_file, chunk = 1000):
				#	chunk = fix_traj(chunk)
				#chunk = md.load(traj_file,stride=1000)
				#print(resIndex_pairs[0:10])
					chunk_features = md.compute_contacts(chunk, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
					print(np.shape(chunk_features))
					contact_features.append(chunk_features)
				contact_features = np.concatenate(contact_features)
			except Exception,e:
				print str(e)
				print("Failed")
				return
				#traj = md.load(traj_file)
				#contact_features = md.compute_contacts(chunk, contacts = contact_residue_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
		else:
			try:
				traj = md.load(traj_file)
				contact_features =  md.compute_contacts(traj, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
			except Exception,e:
				print str(e)
				print("Failed for traj")
				return
Exemple #47
0
def _load_traj_xyz(md_topology, trajectory, atom_subset, verbose, chunk, stride):
    """
    Returns xyz coordinates of all requested trajectories
    """
    
    # first create a list with all the paths that are needed
    try:
        trajectory_path = os.listdir(trajectory)
    except:
        sys.exit('Make sure you have provided a string for a valid path to a trajectory file!')
    else:
        if verbose > 0:
            print 'Loading trajectories from the following files: '
            for trajectory_i in trajectory_path:
                print trajectory_i
                
    # get first frame for superpositioning
    first_frame = md.load(trajectory + trajectory_path[0], frame=0, top=md_topology, atom_indices=atom_subset)
    
    # initiate some variables
    all_coordinates = []
    number_of_frames = 0
    sim_time = []
    
    # now we need to load each trajectory file as a chunk
    try:
        for file_i in trajectory_path:
            
            for chunk_i in md.iterload(trajectory + file_i, chunk, top=md_topology, atom_indices = atom_subset, stride = stride):
                        
                sim_time.append(chunk_i.time)
                
                # superpose each chunk to first frame
                chunk_i.superpose(first_frame, 0)

                if verbose > 1:
                    print 'Successfully loaded trajectory: \n %s' %(chunk_i)

                all_coordinates.append(chunk_i.xyz.reshape(chunk_i.n_frames, chunk_i.n_atoms * 3))
                        
                        
        all_coordinates_np = np.concatenate(all_coordinates)
    except:
        sys.exit('Make sure you provided a valid path to a folder with trajectory files!')
    else:
        print '\nSuccesfully loaded coordinates for %s atoms from %s out of %s frames!' %(all_coordinates_np.shape[1] / 3, all_coordinates_np.shape[0], all_coordinates_np.shape[0] * stride)
    
    
    sim_time = np.concatenate(sim_time)
    
    return all_coordinates_np, sim_time
Exemple #48
0
    def load_data(self):
        load_time_start = time.time()
        data = []
        for tfn in self.filenames:
            kwargs = {} if tfn.endswith('h5') else {'top': self.top}
            for t in md.iterload(tfn, chunk=self.args.split, **kwargs):
                features = self.featurizer.partial_transform(t)
                data.append(features)

        print('Loading data into memory + vectorization: %f s' % (time.time() - load_time_start))
        print('Fitting with %s timeseries from %d trajectories with %d total observations' % (
            len(data), len(self.filenames), sum(len(e) for e in data)))

        return data
Exemple #49
0
def itertrajs(meta, stride=1):
    """Load one mdtraj trajectory at a time and yield it.

    MDTraj does striding badly. It reads in the whole trajectory and
    then performs a stride. We join(iterload) to conserve memory.
    """

    tops = preload_tops(meta)
    for i, row in meta.iterrows():
        yield i, md.join(md.iterload(row['traj_fn'],
                                     top=tops[row['top_fn']],
                                     stride=stride),
                         discard_overlapping_frames=False,
                         check_topology=False)
Exemple #50
0
def _neighbouring_atoms(md_topology, trajectory, atom_subset, atom_number, verbose, unpythonize, chunk, cutoff):    
    
    # first create a list with all the paths that are needed
    try:
        trajectory_path = os.listdir(trajectory)
    except:
        sys.exit('Make sure you have provided a string for a valid path to a trajectory file!')
    else:
        if verbose > 0:
            print 'Loading trajectories from the following files: '
            for trajectory_i in trajectory_path:
                print trajectory_i

    # initiate some variables
    neighbour_atoms = []
    sim_time=[]
    number_of_frames = 0
    
    
    # now we need to load each trajectory file as a chunk
    try:

        pbar = tqdm(total=len(trajectory_path), unit= 'File')
        
        for file_i in trajectory_path:
            for chunk_i in md.iterload(trajectory + file_i, chunk, top=md_topology, atom_indices = atom_subset):

                sim_time.append(chunk_i.time)
                number_of_frames += chunk_i.n_frames

                if verbose > 1:
                    print 'Successfully loaded trajectory: \n %s' %(chunk_i)

                neighbour_atoms.append(md.compute_neighbors(chunk_i, cutoff, np.array([atom_number])))
                
            neighbour_atoms_np =np.concatenate(neighbour_atoms)
            
            pbar.update(1)
                    
    except:
        sys.exit('Make sure you provided a valid path to a folder with trajectory files!')
    else:
        print '\nSuccesfully loaded coordinates for %s atoms in %s frames!' %(len(atom_subset), number_of_frames)

    all_neighbour_atoms_np = np.concatenate(neighbour_atoms_np)
    
    sim_time = np.concatenate(sim_time)
    
    return all_neighbour_atoms_np, sim_time
Exemple #51
0
    def load_data(self):
        load_time_start = time.time()
        data = []
        for tfn in self.filenames:
            kwargs = {} if tfn.endswith('h5') else {'top': self.top}
            for t in md.iterload(tfn, chunk=self.args.split, **kwargs):
                item = np.asarray(md.compute_dihedrals(t, self.indices), np.double)
                data.append(item)

        print('Loading data into memory + vectorization: %f s' %
              (time.time() - load_time_start))
        print('''Fitting with %s timeseries from %d trajectories with %d
                total observations''' % (len(data), len(self.filenames),
                                         sum(len(e) for e in data)))
        return data
Exemple #52
0
def main():
    args = parse_args()
    #We need to prepare some things
    single_frame=md.load(args.pdb)                            
    edges=find_edges(single_frame, args.np)
    mask=single_frame.topology.select("water and name O")                              #Index of the atom to keep
    density=np.zeros((args.np, args.np,args.np),dtype=np.float)                        #To store results
    nf=0
    for frame in md.iterload(args.xtc, top=args.pdb, chunk=1):                         #Loop over the frames (one by one for mem)
        density += get_frame_density(np.take(frame[0].xyz[0],mask,axis=0),edges)
        nf += 1                                                                        #Any more elegant way for this?  
        #print density 
    density=density/nf
    density=density/(args.density*((edges[0][1]-edges[0][0])*(edges[0][1]-edges[0][0])*(edges[0][1]-edges[0][0])))
    print_density(density,edges,args.out_name,args.np)
Exemple #53
0
def makeHMM(Trajectories, topology):
    top = md.load_prmtop(topology)
    alpha_carbons = [a.index for a in top.atoms if a.name == 'CA']
    filenames = sorted(glob(Trajectories))
    first_frame = md.load_frame(filenames[0], 0, top=top)

    f = SuperposeFeaturizer(alpha_carbons, first_frame)
    dataset = []
    for fragment in filenames:
            for chunk in md.iterload(fragment, chunk=100, top=top):
                dataset.append(f.partial_transform(chunk))
    hmm = GaussianHMM(n_states=8)
    hmm.fit(dataset)
    print(hmm.timescales_)
    return hmm
Exemple #54
0
def featurize_all(filenames, featurizer, topology, chunk=1000, stride=1):
    """Load and featurize many trajectory files.

    Parameters
    ----------
    filenames : list of strings
        List of paths to MD trajectory files
    featurizer : Featurizer
        The featurizer to be invoked on each trajectory trajectory as
        it is loaded
    topology : str, Topology, Trajectory
        Topology or path to a topology file, used to load trajectories with
        MDTraj
    chunk : {int, None}
        If chunk is an int, load the trajectories up in chunks using
        md.iterload for better memory efficiency (less trajectory data needs
        to be in memory at once)
    stride : int, default=1
        Only read every stride-th frame.

    Returns
    -------
    data : np.ndarray, shape=(total_length_of_all_trajectories, n_features)
    indices : np.ndarray, shape=(total_length_of_all_trajectories)
    fns : np.ndarray shape=(total_length_of_all_trajectories)
        These three arrays all share the same indexing, such that data[i] is
        the featurized version of indices[i]-th frame in the MD trajectory
        with filename fns[i].
    """
    data = []
    indices = []
    fns = []

    for file in filenames:
        kwargs = {} if file.endswith('.h5') else {'top': topology}
        count = 0
        for t in md.iterload(file, chunk=chunk, stride=stride, **kwargs):
            x = featurizer.partial_transform(t)
            n_frames = len(x)

            data.append(x)
            indices.append(count + (stride*np.arange(n_frames)))
            fns.extend([file] * n_frames)
            count += (stride*n_frames)
    if len(data) == 0:
        raise ValueError("None!")

    return np.concatenate(data), np.concatenate(indices), np.array(fns)
Exemple #55
0
def HDO_tetrahedrality(fn_traj, fn_top, fn_save, ibead, len_chunk=100, select_A ='name O', select_B='name O'):
    H2O_Qs = np.array([])
    HOD_Qs = np.array([])
    t0 = time.time()
    print('Processing bead %d...' % ibead)
    print('')
    top = md.load(fn_top).topology
    trj = md.iterload(fn_traj, top=top, chunk=len_chunk)
    species = load_idxs('index-GMX.ndx')    
    Os_H2O = species['O_H2O']
    Os_HDO = species['O_HDO']
    # Prepare index pairs
    idx_A = top.select(select_A)
    idx_B = top.select(select_B)
    n_A   = len(idx_A)
    n_B   = len(idx_B)
    pairs = []
 
    for iB in idx_B:
        for iA in idx_A:
            pairs.append((iB, iA))
    pairs = np.array(pairs, dtype=int)
    i_frame = 0
 
    for chunk in trj:
        neighbors = extract_neighbors(chunk, pairs, 4, n_A, n_B)
 
        for i in range(len_chunk):
            # Iteration over chunk is necessary because neighbors
            # are not fixed over the trajectory.
            # Note: Try block primarily for running analysis on
            # unfinished trajectory.
            try:
                H2O_Qs = np.append(H2O_Qs, extract_Q_indxd(chunk[i], neighbors[i], idx_A, Os_H2O))
                HOD_Qs = np.append(HOD_Qs, extract_Q_indxd(chunk[i], neighbors[i], idx_A, Os_HDO))
            except IndexError:
                print('IndexError. Continuing to next bead.')
                break

        i_frame += len_chunk
        np.savetxt('H2O' + fn_save, H2O_Qs)
        np.savetxt('HDO' + fn_save, HOD_Qs)

    np.savetxt('H2O' + fn_save, H2O_Qs)
    np.savetxt('HDO' + fn_save, HOD_Qs)
    t1 = time.time()
    print('Processing bead %d took %.2f minutes.' % (ibead, (t1-t0)/60.0))
    print('')
def calc_chunkwise( func, traj_list, top_file, chunk_size=1, dim=1, stride=1, skip=0 ):
# This function computes some observable from an md traj in trunks, as to not use too much memory
# It assumes that the passed in function has no additional input (e.g., use a lambda function)
# and that the output is to be (non-weighted) averaged over chuncks
    count = 0
    for i in range(len(traj_list)):
        for chunk in md.iterload(traj_list[i], chunk=chunk_size, top=top_file, stride=stride, skip=skip):
            func_ret_tmp = func(chunk)
            if (count==0):
                func_ret = np.array(func_ret_tmp)
            else:
                for j in range(dim):
                    func_ret[j] += np.array(func_ret_tmp)[j]
            count += 1
    for i in range(dim):
        func_ret[i] /= (1.0*count)
    return func_ret, count
Exemple #57
0
def test_iterload_skip(ref_traj, get_fn):
    if ref_traj.fobj is md.formats.PDBTrajectoryFile:
        pytest.xfail("PDB Iterloads an extra frame!!")
    if ref_traj.fobj is md.formats.GroTrajectoryFile:
        pytest.xfail("Not implemented for some reason")
    if ref_traj.fext in ('ncrst', 'rst7'):
        pytest.skip("Only 1 frame per file format")

    top = md.load(get_fn('native.pdb'))
    t_ref = md.load(get_fn(ref_traj.fn), top=top)

    for cs in [0, 1, 11, 100]:
        for skip in [0, 1, 20, 101]:
            t = functools.reduce(lambda a, b: a.join(b),
                                 md.iterload(get_fn(ref_traj.fn), skip=skip, top=top, chunk=cs))
            eq(t_ref.xyz[skip:], t.xyz)
            eq(t_ref.time[skip:], t.time)
            eq(t_ref.topology, t.topology)
Exemple #58
0
def test_iterload(write_traj, get_fn):
    if write_traj.fext == 'dtr':
        pytest.xfail("This is broken with dtr")
    t_ref = md.load(get_fn('frame0.h5'))[:20]

    if write_traj.fext in ('ncrst', 'rst7'):
        pytest.skip("Only 1 frame per file format")

    t_ref.save(write_traj.fn)

    for stride in [1, 2, 3]:
        loaded = md.load(write_traj.fn, top=t_ref, stride=stride)
        iterloaded = functools.reduce(lambda a, b: a.join(b),
                                      md.iterload(write_traj.fn, top=t_ref, stride=stride, chunk=6))
        eq(loaded.xyz, iterloaded.xyz)
        eq(loaded.time, iterloaded.time)
        eq(loaded.unitcell_angles, iterloaded.unitcell_angles)
        eq(loaded.unitcell_lengths, iterloaded.unitcell_lengths)