Esempio n. 1
0
def load_trajchunks(traj,
                    parm,
                    start=1,
                    stride=1,
                    standard_names=True,
                    **kwargs):
    """Loads a file into a generator of MDtraj trajectory chunks.
       Useful for large/memory intensive trajectory files
       Usage: load_trajchunks(traj, parm, [start=1, stride=1, **kwargs])
       Standard kwargs include chunk (size of the trajectory chunks
       to load per iteration), and atom_indices (an array of 0-indexed
       atoms to keep).
       
       'standard_names=False' (not the default here, or in MDTraj)
       may also be useful for PDB topologies, otherwise amide H might
       be renamed from the atom names provided to the standard PDB identifiers
       (e.g. 'H', 'H2', 'H3' for the terminal NH3 group). 
   
       Returns a generator object with trajectory iterations."""
    try:
        parmobj = md.load_topology(parm, standard_names=standard_names)
    except TypeError:
        parmobj = md.load_topology(
            parm)  # MDTraj only has standard_names kwarg for certain filetypes
    return md.iterload(traj,
                       top=parmobj,
                       skip=start - 1,
                       stride=stride,
                       **kwargs)  # Start is zero indexed
Esempio n. 2
0
def load_fulltraj(traj,
                  parm,
                  start=1,
                  stop=None,
                  stride=1,
                  standard_names=True,
                  **kwargs):
    """Loads an MDtraj trajectory object with the desired topology
       and coordinates.
       Usage: setup_universe(parm,traj,[start=1,stop=None,stride=1,**kwargs])
       Standard kwargs include atom_indices (an array of 0-indexed
       atoms to keep) and stride (integer of every nth frame to keep).

       'standard_names=False' (not the default here, or in MDTraj) 
       may also be useful for PDB topologies, otherwise amide H might
       be renamed from the atom names provided to the standard PDB identifiers
       (e.g. 'H', 'H2', 'H3' for the terminal NH3 group).
       
       Returns a complete trajectory, which may be memory intensive.

       See also load_trajchunks for an iterative load of large trajectories """
    try:
        parmobj = md.load_topology(parm, standard_names=standard_names)
    except TypeError:
        parmobj = md.load_topology(
            parm)  # MDTraj only has standard_names kwarg for certain filetypes
    t = md.load(traj, top=parmobj, **kwargs)
    if stop is None:
        stop = t.n_frames
    return t[start - 1:stop:stride]  # Start is zero indexed
Esempio n. 3
0
def plot_rmsd(trajectories,
              topology=None,
              subset=None,
              output='rmsd.dat',
              chunksize=100,
              reimage=False):
    import mdtraj
    import numpy as np
    from tqdm import tqdm
    if topology:
        topology = mdtraj.load_topology(topology)
    if subset:
        subset = topology.select(subset)
    trajectories = sorted(trajectories, key=sort_key_for_numeric_suffixes)
    first_frame = mdtraj.load_frame(trajectories[0], 0, top=topology)
    frame_size = first_frame.xyz[0].nbytes
    if reimage:
        first_frame.image_molecules(inplace=True)
    rmsds = []
    for trajectory in tqdm(trajectories, unit='file'):
        _, ext = os.path.splitext(trajectory)
        total, unit_scale = None, None
        if ext.lower() == '.dcd':
            n_frames = round(
                os.path.getsize(trajectory) / frame_size,
                -1 * len(str(chunksize)[1:]))
            total = int(n_frames / chunksize)
            unit_scale = chunksize
        itertraj = mdtraj.iterload(trajectory, top=topology, chunk=chunksize)
        tqdm_kwargs = {
            'total': total,
            'unit': 'frames',
            'unit_scale': unit_scale,
            'postfix': {
                'traj': trajectory
            }
        }
        for chunk in tqdm(itertraj, **tqdm_kwargs):
            if reimage:
                chunk.image_molecules(inplace=True)
            rmsd = mdtraj.rmsd(chunk, first_frame,
                               atom_indices=subset) * 10.0  # nm->A
            rmsds.append(rmsd)

    rmsds = np.concatenate(rmsds)
    with open(output, 'w') as f:
        f.write('\n'.join(map(str, rmsds)))
    print('\nWrote RMSD values to', output)
    print('Plotting results...')
    plt.plot(rmsds)
    fig = plt.gca()
    fig.set_title('{}{}'.format(
        trajectories[0], ' and {} more'.format(
            len(trajectories[1:]) if len(trajectories) > 1 else '')))
    fig.set_xlabel('Frames')
    fig.set_ylabel('RMSD (A)')
    plt.show()
Esempio n. 4
0
def test_0():
    top = load_topology(get_fn('native2.pdb'), no_boxchk=True)
    t1 = load(get_fn('native2.xml'), top=top)
    t2 = load(get_fn('native2.pdb'), no_boxchk=True)

    t1.center_coordinates()
    t2.center_coordinates()

    yield lambda: eq(t1.xyz, t2.xyz)
    yield lambda: eq(t1.unitcell_vectors, t2.unitcell_vectors)
Esempio n. 5
0
def load_partial(netcdf, prmtop, start, stop, stride=1):
    topology = md.load_topology(prmtop)
    with md.open(netcdf) as f:
        f.seek(start)
        t = f.read_as_traj(
            topology,
            n_frames=int((stop-start)/stride),
            stride=stride,
        )
        return t
Esempio n. 6
0
def test_xml(get_fn):
    top = load_topology(get_fn('native2.pdb'), no_boxchk=True)
    t1 = load(get_fn('native2.xml'), top=top)
    t2 = load(get_fn('native2.pdb'), no_boxchk=True)

    t1.center_coordinates()
    t2.center_coordinates()

    assert eq(t1.xyz, t2.xyz)
    assert eq(t1.unitcell_vectors, t2.unitcell_vectors)
Esempio n. 7
0
def get_positions(
    topology: PathLike,
    trajectory: List[str],
    *,
    mask: str = "all",
    stride: Optional[int] = None,
) -> NDArray[(Any, ...), Float]:
    """Read a molecular dynamics trajectory and retrieve the coordinates.

    Parameters
    ----------
    topology : PathLike
        Topology file
    trajectory : list of str
        Trajectory file
    mask : str
        Selection criterion for coordinates
    stride : int, optional
        Number of steps to read

    Returns
    -------
    NDArray
        The coordinates with shape (n_frames / step, n_atoms, 3)
    """
    top: md.Topology = md.load_topology(topology)
    selection: Optional[NDArray[(Any, ...), Float]] = (
        top.select(mask) if mask != "all" else None
    )
    filenames = (
        glob.iglob(*trajectory)
        if len(trajectory) == 1 and "*" in "".join(trajectory)
        else trajectory
    )

    # MDTraj stores positions in nanometers; we convert it to Ångstroms.
    positions: NDArray[(Any, ...), Float] = np.concatenate(
        [
            frames.xyz
            for filename in filenames
            for frames in md.iterload(
                filename, top=top, atom_indices=selection, stride=stride
            )
        ],
        axis=0,
    )
    if not (
        ".gro" in "".join(filenames)
        or ".xtc" in "".join(filenames)
        or ".trj" in "".join(filenames)
        or ".tng" in "".join(filenames)
    ):
        in_units_of(positions, "nanometer", "angstroms", inplace=True)
    return positions
Esempio n. 8
0
    def mobile(self) -> NDArray[(Any, ...), Float]:
        """Load coordinates from a trajectory file.

        Returns
        -------
        NDArray
            Trajectory
        """
        top = md.load_topology(TOPWW)
        sel = top.select("protein and name CA")
        traj = md.load(TRJWW, top=top, atom_indices=sel)
        return traj.xyz
Esempio n. 9
0
    def mobile(self) -> NDArray[(Any, ...), Float]:
        """Load coordinates from a trajectory file.

        Returns
        -------
        NDArray
            Trajectory
        """
        topology = md.load_topology(TOPWW)
        indices = topology.select("protein and name CA")
        universe = md.load(TRJWW, top=topology).atom_slice(indices)
        return universe.xyz
Esempio n. 10
0
    def calculate_grid_quantities(self,
                                  energy=True,
                                  entropy=True,
                                  hbonds=True,
                                  start_frame=None,
                                  num_frames=None):

        if start_frame is None:
            start_frame = self.start_frame
        if num_frames is None:
            num_frames = self.num_frames

        chunk_size = 1
        if (start_frame + num_frames) <= chunk_size:
            chunk_size = start_frame + num_frames

        chunk_iter = int(num_frames / chunk_size)
        #chunk_iter += int((start_frame + num_frames) % chunk_size)
        chunk_counter = 0
        print_progress_bar(chunk_counter, chunk_iter)
        topology = md.load_topology(self.topology_file)
        for i in xrange(start_frame, start_frame + num_frames):
            chunk_counter += 1
            self.process_chunk(i, chunk_size, topology, energy, hbonds,
                               entropy)
            print_progress_bar(chunk_counter, chunk_iter)
            if chunk_counter == chunk_iter:
                break

        for voxel in xrange(self.voxeldata.shape[0]):
            if self.voxeldata[voxel, 4] >= 1.0:
                self.voxeldata[
                    voxel,
                    12] = self.voxeldata[voxel, 11] / self.voxeldata[voxel, 4]
                self.voxeldata[voxel, 11] /= (num_frames * self.voxel_vol)
                self.voxeldata[voxel, 14] = self.voxeldata[voxel, 13] / (
                    self.voxeldata[voxel, 4] * 2.0)
                self.voxeldata[voxel,
                               13] /= (num_frames * self.voxel_vol * 2.0)
                if self.voxeldata[voxel, 17] > 0.0:
                    self.voxeldata[voxel, 16] = self.voxeldata[voxel, 15] / (
                        self.voxeldata[voxel, 17] * 2.0)
                    self.voxeldata[voxel,
                                   15] /= (num_frames * self.voxel_vol *
                                           self.voxeldata[voxel, 17] * 2.0)
                for i in range(17, 35, 2):
                    self.voxeldata[voxel, i + 1] = self.voxeldata[
                        voxel, i] / self.voxeldata[voxel, 4]
                    self.voxeldata[voxel, i] /= (num_frames * self.voxel_vol)
        if entropy:
            self.calculate_entropy(num_frames=num_frames)
Esempio n. 11
0
def test_box_load_save(write_traj_with_box, get_fn):
    t = md.load(get_fn('native2.pdb'), no_boxchk=True)
    top = md.load_topology(get_fn('native.pdb'), no_boxchk=True)

    # make sure than through a load/save
    # cycle, the box information is preserved:
    t.save(write_traj_with_box.fn)
    t2 = md.load(write_traj_with_box.fn, top=top)

    assert t.unitcell_vectors is not None
    assert eq(t.xyz, t2.xyz, decimal=3)
    assert eq(t.unitcell_vectors, t2.unitcell_vectors)
    assert eq(t.unitcell_angles, t2.unitcell_angles)
    assert eq(t.unitcell_lengths, t2.unitcell_lengths)
Esempio n. 12
0
def test_box_load_save(write_traj_with_box, get_fn):
    t = md.load(get_fn('native2.pdb'), no_boxchk=True)
    top = md.load_topology(get_fn('native.pdb'), no_boxchk=True)

    # make sure than through a load/save
    # cycle, the box information is preserved:
    t.save(write_traj_with_box.fn)
    t2 = md.load(write_traj_with_box.fn, top=top)

    assert t.unitcell_vectors is not None
    assert eq(t.xyz, t2.xyz, decimal=3)
    assert eq(t.unitcell_vectors, t2.unitcell_vectors)
    assert eq(t.unitcell_angles, t2.unitcell_angles)
    assert eq(t.unitcell_lengths, t2.unitcell_lengths)
Esempio n. 13
0
def get_average_structure(
    topology: PathLike,
    trajectory: List[str],
    *,
    mask: str = "all",
    stride: Optional[int] = None,
) -> md.Trajectory:
    """Compute the average structure of a trajectory.

    Parameters
    ----------
    topology : PathLike
        Topology file
    trajectory : list of str
        List of trajectory files
    mask : str
        Atom selection
    stride : int, optional
        Number of steps to read

    Returns
    -------
    Trajectory
        The average positions
    """
    n_frames: int = 0
    positions_: List[NDArray[(Any, ...), Float]] = []
    indices: Optional[NDArray[(Any, ...), Float]] = (
        md.load_topology(topology).select(mask) if mask != "all" else None
    )
    filenames = (
        glob.iglob(*trajectory)
        if len(trajectory) == 1 and "*" in "".join(trajectory)
        else trajectory
    )

    for filename in filenames:
        for frames in md.iterload(
            filename, top=topology, atom_indices=indices, stride=stride
        ):
            n_frames += frames.n_frames
            coordinates = frames.xyz.sum(axis=0)
            positions_.append(coordinates)

    # MDTraj stores positions in nanometers; we convert it to Ångstroms.
    positions: NDArray[(Any, ...), Float] = np.asfarray(positions_)
    frames.xyz = positions.sum(axis=0) / n_frames
    frames.unitcell_angles = frames.unitcell_angles[0, :]
    frames.unitcell_lengths = frames.unitcell_lengths[0, :]
    return frames
Esempio n. 14
0
def to_mdtraj_Topology(item, atom_indices='all', check=True):

    if check:

        digest_item(item, 'file:pdb')
        atom_indices = digest_atom_indices(atom_indices)

    from mdtraj import load_topology
    from ..mdtraj_Topology import extract as extract_mdtraj_Topology

    tmp_item = load_topology(item)
    tmp_item = extract_mdtraj_Topology(tmp_item,
                                       atom_indices=atom_indices,
                                       check=False)

    return tmp_item
Esempio n. 15
0
 def trajs_from_irrows(self, irow):
     """
     Load each trajectory in the rows of an msmbuilder.metadata object
     :param irow: iterable coming from pd.DataFrame.iterrow method
     :return i, traj: The traj id (starting at 0) and the mdtraj.Trajectory object
     """
     i, row = irow
     logger.info('Loading {}'.format(row['traj_fn']))
     atom_ids = mdtraj.load_topology(row['top_fn']).select(
         self.atoms_to_load)
     logger.debug('Will load {} atoms'.format(len(atom_ids)))
     traj = mdtraj.load(row['traj_fn'],
                        top=row['top_fn'],
                        stride=self.stride,
                        atom_indices=atom_ids)
     return i, traj
Esempio n. 16
0
def test_box_load_save():
    t = md.load(get_fn('native2.pdb'), no_boxchk=True)

    # these four tempfile have extensions (dcd, xtc, trr, h5) that
    # should store the box information. lets make sure than through a load/save
    # cycle, the box information is preserved:
    top = md.load_topology(get_fn('native.pdb'), no_boxchk=True)
    for temp_fn in [tmpfns['xtc'], tmpfns['dcd'], tmpfns['trr'], tmpfns['h5']]:
        t.save(temp_fn)
        if temp_fn.endswith('.h5'):
            t2 = md.load(temp_fn)
        else:
            t2 = md.load(temp_fn, top=top)

        assert t.unitcell_vectors is not None
        yield lambda: eq(t.xyz, t2.xyz, decimal=3)
        yield lambda: eq(t.unitcell_vectors, t2.unitcell_vectors)
        yield lambda: eq(t.unitcell_angles, t2.unitcell_angles)
        yield lambda: eq(t.unitcell_lengths, t2.unitcell_lengths)
Esempio n. 17
0
def preload_top(meta):
    """Load one topology file into memory.

    This function checks to make sure there's only one topology file
    in play. When sampling frames, you have to have all the same
    topology to concatenate.

    Parameters
    ----------
    meta : pd.DataFrame
        The DataFrame of metadata with a column named 'top_fn'

    Returns
    -------
    top : md.Topology
        The one topology file that can be used for all trajectories.
    """
    top_fns = set(meta['top_fn'])
    if len(top_fns) != 1:
        raise ValueError("More than one topology is used in this project!")
    return md.load_topology(top_fns.pop())
Esempio n. 18
0
def preload_top(meta):
    """Load one topology file into memory.

    This function checks to make sure there's only one topology file
    in play. When sampling frames, you have to have all the same
    topology to concatenate.

    Parameters
    ----------
    meta : pd.DataFrame
        The DataFrame of metadata with a column named 'top_fn'

    Returns
    -------
    top : md.Topology
        The one topology file that can be used for all trajectories.
    """
    top_fns = set(meta['top_fn'])
    if len(top_fns) != 1:
        raise ValueError("More than one topology is used in this project!")
    return md.load_topology(top_fns.pop())
Esempio n. 19
0
    def __init__(self, topo_path, bins, strc_path_ref=None, pdb_path_ref=None):

        self.topo = md.load_topology(topo_path)
        self.solvent_O_idxs = self.topo.select("water and name O")
        self.bins = bins
        self.grid_resolution = np.array([1., 1., 1.])
        self.xx = np.zeros(3, dtype=float)
        self.yy = np.zeros(3, dtype=float)
        self.zz = np.zeros(3, dtype=float)
        self.center = np.zeros(3, dtype=float)
        self.f2r = np.eye(3, 3)
        self.f = None
        self.fitting = False
        self.ref = None
        self.ref_sele = None
        self.sele = None

        if strc_path_ref != None and pdb_path_ref != None:
            self.ref = md.load_frame(strc_path_ref, 0, top=pdb_path_ref)
            self.ref_sele = self.ref.topology.select(
                "name CA or name N or name C")
            self.sele = self.topo.select("name CA or name N or name C")
            self.fitting = True
Esempio n. 20
0
def preload_tops(meta):
    """Load all topology files into memory.

    This might save some performance compared to re-parsing the topology
    file for each trajectory you try to load in. Typically, you have far
    fewer (possibly 1) topologies than trajectories

    Parameters
    ----------
    meta : pd.DataFrame
        The DataFrame of metadata with a column named 'top_fn'

    Returns
    -------
    tops : dict
        Dictionary of ``md.Topology`` objects, keyed by "top_fn"
        values.
    """
    top_fns = set(meta['top_fn'])
    tops = {}
    for tfn in top_fns:
        tops[tfn] = md.load_topology(tfn)
    return tops
Esempio n. 21
0
    def calculate_site_quantities(self,
                                  energy=True,
                                  entropy=True,
                                  hbonds=True,
                                  energy_lr_breakdown=False,
                                  angular_structure=False,
                                  shell_radii=None,
                                  r_theta_cutoff=6.0):
        """
        Performs site-based solvation thermodynamics and structure calculations by iterating
        over frames in the trajectory. If water molecules in hydration sites are already determined
        (the case when clustering is already done), then the list of hydration site waters in
        each frame is used to iterate over each water and calculate its properties. If externally
        determined hydration sites are provided (when self.clustercenter_file is set to a pdb file of
        hydration sites) then for each site, corresponding water is found in each frame and is used
        for caclulations.

        Parameters
        ----------
        energy : bool, optional
            Description
        hbonds : bool, optional
            Description
        entropy : bool, optional
            Description

        Returns
        -------
        None : NoneType
            This function updates hydration site data structures to store the results of calculations.
        """
        print_progress_bar(0, self.num_frames)
        topology = md.load_topology(self.topology_file)
        read_num_frames = 0
        if energy_lr_breakdown:
            if shell_radii is None:
                shell_radii = [3.5, 5.5, 8.5]
            else:
                assert len(shell_radii) == 3, "Water-water energy decomposition supported only upto 3 solvation shells." \
                                              "Please provide outer radii for three shells."
            shell_radii = [i**2 for i in shell_radii]
            shell_radii.insert(0, 0.0)
            self.energy_ww_lr_breakdown = [[
                0.0 for s in shell_radii
            ] for i in range(self.hsa_data.shape[0])]

        if angular_structure:
            if r_theta_cutoff > 8.0:
                print(
                    "Warning: r_theta_cutoff > 8.0 can take a long time."
                    "Resetting angular structure distance cutoff to 8.0 Angstrom"
                )
                r_theta_cutoff = 8.0
            self.angular_st_distribution = [
                [] for i in range(self.hsa_data.shape[0])
            ]

        with md.open(self.trajectory) as f:
            for frame_i in range(self.start_frame,
                                 self.start_frame + self.num_frames):
                print_progress_bar(frame_i - self.start_frame, self.num_frames)
                f.seek(frame_i)
                trj = f.read_as_traj(topology, n_frames=1, stride=1)
                if trj.n_frames == 0:
                    print("No more frames to read.")
                    break
                else:
                    self._process_frame(trj, frame_i, energy, hbonds, entropy,
                                        energy_lr_breakdown, angular_structure,
                                        shell_radii, r_theta_cutoff)
                    read_num_frames += 1
            if read_num_frames < self.num_frames:
                print((
                    "{0:d} frames found in the trajectory, resetting self.num_frames."
                    .format(read_num_frames)))
                self.num_frames = read_num_frames

        if entropy:
            self.generate_data_for_entropycalcs(self.start_frame,
                                                self.num_frames)
            self.run_entropy_scripts()
        self.normalize_site_quantities(self.num_frames)
Esempio n. 22
0
def _dist_atom_selection(topology, atom1, atom2, verbose, unpythonize):
    """
    Function takes a topology file and residue selection and verifies if
    the lattter is possible. It also returns the C-alpha atom selection
    Input:
        topology: 
            mdtraj.Topology or string
            Either mdtraj.Topology object or path to trajectory
            to be loaded
        residue_selection:
            string or list with integers
            String will be interpreted with the Mdtraj atom
            selection language. The list will be treated as
            atom number
        selection (by default True):
            boolean
            if true the function will try to return the residue/
            atom selection
    Output:
        atom_subset
            numpy.array 
            array with all the atom numbers corresponding to selection
        md_topology
            mdtraj.core.topology.Topology object of protein 
    """
    ## First have to load all the inputs if they are defined by a path
    if isinstance(topology, str):
        if os.path.exists(topology):
            try:
                md_topology = md.load_topology(topology)
            except:
                sys.exit('Make sure you have provided a valid path to topology file!')
            else:
                if verbose > 0:
                    print 'The following topology file was succesfully loaded: \n %s \n' %(md_topology)

    elif isinstance(topology, md.core.topology.Topology):
        md_topology = topology
        if verbose > 0:
            print 'The following topology file was succesfully loaded: \n %s \n' %(md_topology)
    else:
        sys.exit('Invalid input! Must be a valid path to topology file or mdtraj.Topology object')
    
    ## check chosen atoms
    if isinstance(atom1, str):
        try:
            atom_subset1 = md_topology.select(atom1)
        except:
            sys.exit('Invalid atom selection!')
        else:
            if verbose > 0:
                print 'Atom1 number: %s' %(atom_subset1[0] + 1)
    
    
    elif isinstance(atom1, int):
        atom1 = atom1 - 1
        try:
            atom_subset1 = []
            if atom1 < md_topology.n_atoms:
                atom_subset1.append(atom1)
        except:
            sys.exit('Atom selection invalid for given topology!')
        else:
            if verbose > 0:
                print 'Atom1 number: %s' %(atom_subset1[0] + 1)
                
    else:
        sys.exit('Invalid atom selection, you need to provide a string or an integer')

    if isinstance(atom2, str):
        try:
            atom_subset2 = md_topology.select(atom2)
        except:
            sys.exit('Invalid atom selection!')
        else:
            if verbose > 0:
                print 'Atom2 number: %s' %(atom_subset2[0] + 1)                   
                
    elif isinstance(atom2, int):
        atom2 = atom2 - 1
        try:
            atom_subset2 = []
            if atom2 < md_topology.n_atoms:
                atom_subset2.append(atom2)
        except:
            sys.exit('Invalid atom selection!')
        else:
            if verbose > 0:
                print 'Atom2 number: %s' %(atom_subset2[0] + 1)
                
    else:
        sys.exit('Invalid atom selection, you need to provide a string or an integer')

    pair = np.append(atom_subset1, atom_subset2).reshape(1,2)
    
    # should work until 1109
    if unpythonize:
        # rename residues (still testing)
        final_resid_name = []
        for i in md_topology.subset(pair).atoms:
            residues = str(i)
            # position to substitute
            pos = []
            num = []
            for i in range(len(residues)):    
                # skip first three letters
                if i<6 and residues[i].isdigit():
                    pos.append(i)
                    num.append(int(residues[i]))
            if len(pos) == 1:
                if num[-1] < 9:
                    num[-1] += 1
                else:
                    num[-1] = 10
            if len(pos) == 2:
                if num[-1] < 9:
                    num[-1] += 1
                else:
                    num[-1] = 0
                    num[0] += 1
            if len(pos) == 3:
                if num[-1] < 9:
                    num[-1] += 1
                else:
                    num[1] += 1
                    num[-1] = 0
            z = 0
            residues = list(residues)
            for j in pos:
                residues[j] = str(num[z])
                z+=1
            final_resid_name.append("".join(residues))
        final_resid = " - ".join(final_resid_name)
    
    if verbose > 0:
        print 'Calculating distance between following atoms: %s \n' %(final_resid)
    
    
    return md_topology, pair, final_resid
Esempio n. 23
0
def Search_atom_index(df_noes, gro_file):
    ''' Assing the topology index to each atom

    Input:

       - df_noes (pandas dataframe): NOE distances dataframe
       - gro_file (str): topology file

    Output:

        - df_noes (pandas dataframe) : NOE distances modified dataframe
    '''

    top = md.load_topology(gro_file)
    AtomID1 = []
    AtomID2 = []
    for noe in df_noes.itertuples():
        ai = top.select("resid " + str(int(noe[1]) - 1) + " and(name " +
                        noe[3] + ")")
        aj = top.select("resid " + str(int(noe[4]) - 1) + " and(name " +
                        noe[6] + ")")
        if len(ai) == 0:
            ai = -1
        else:
            ai = ai[0]
        if len(aj) == 0:
            aj = -1
        else:
            aj = aj[0]

        AtomID1.append(int(ai))
        AtomID2.append(int(aj))

    df_noes['AtomID1'] = AtomID1
    df_noes['AtomID2'] = AtomID2

    todel = []
    toadd = []

    for i, row in df_noes.iterrows():
        if int(row.AtomID1) > int(row.AtomID2):
            newrow = row.copy()
            rid1 = row.ResID1
            rt1 = row.ResType1
            a1 = row.Atom1
            aid1 = row.AtomID1

            newrow.ResID1 = row.ResID2
            newrow.ResType1 = row.ResType2
            newrow.Atom1 = row.Atom2
            newrow.AtomID1 = row.AtomID2

            newrow.ResID2 = rid1
            newrow.ResType2 = rt1
            newrow.Atom2 = a1
            newrow.AtomID2 = aid1

            todel.append(i)
            toadd.append(newrow)

    df_noes = df_noes.drop(todel)
    df_noes = df_noes.append(toadd, ignore_index=True)

    return df_noes
Esempio n. 24
0
nframe = Config.getint('makeVectors', 'nframes')
structFile = Config.get('makeVectors', 'structFile')
trajFile = Config.get('makeVectors', 'trajFile')
projection = Config.getboolean('makeVectors', 'projection')
nProcess = Config.getint('makeVectors', 'nProcess')
atoms = Config.get('makeVectors', 'atoms')

pool = Pool(processes=nProcess)

ref = True
descriptorsList = []
eigenVectorsList = []
eigenValuesList = []
meansList = []

top = md.load_topology(structFile)
traj = md.load(glob(trajFile), top=top, atom_indices=top.select(atoms))

print('Loaded topology: \n%s \n' % top)
print('Loaded trajectory: \n%s \n' % traj)

mask = numpy.ones((traj.xyz.shape[1]), dtype="bool")

assert nframe <= traj.n_frames, "More frames selected (%s) than provided (%s)!" % (
    nframe, traj.n_frames)

print('Starting PCA...')
trajIndex = 0
pbar = tqdm(total=nframe, unit='Frame')

while trajIndex < nframe:
Esempio n. 25
0
import matplotlib.cm as cm
#}}}

n = 4 # Ligand Number
N = n-1 # Index for Ligand
get_data = True
fname = 'ligand%s'%n # Filename for outputs
gfiles = [8690,8693,8696,8699] # list of grofile names

# List of all the Karplus coefficient Models
Models = ["Ruterjans1999","Bax2007","Bax1997","Habeck" ,"Vuister","Pardi"]

# Set paths
data = '/Users/tuc41004/Desktop/nmr-biceps/BICePs_2.0/test_J_coupling/%s/'%fname
gro = data+'%s.gro'%gfiles[N]
top = md.load_topology(gro)
#trajs = [data + 'traj%s.xtc'%i for i in range(len(glob.glob(data+'traj*.xtc')))]
trajs = sorted(glob.glob(data+'traj*.xtc'))

# Experimental Data from Erdelyi et al - Table S5.
exp_1 = np.array([7.9,7.3,0,7.7,8.4,0,0,0,0])
exp_2 = np.array([0,7.4,0,6.2,7.4,0,7.8,0,0])
exp_3 = np.array([7.3,8.6,0,8.0,8.2,7.3,7.5,0,0])
exp_4 = np.array([6.6,7.3,0,6.8,0,7.4,0,0,0])


# Get Theoretical:{{{
if get_data == True:
    J = {}       # Dictionary of all J3_HN_HA values for each model
    J_val = {}   #
    J_3_exp = [] #
Esempio n. 26
0
def _atom_selection(topology, residue_selection, selection, verbose):
    """
    Function takes a topology file and residue selection and verifies if
    the lattter is possible. It also returns the C-alpha atom selection
    Input:
        topology: 
            mdtraj.Topology or string
            Either mdtraj.Topology object or path to trajectory
            to be loaded
        residue_selection:
            string or list with integers
            String will be interpreted with the Mdtraj atom
            selection language. The list will be treated as
            atom number
        selection (by default True):
            boolean
            if true the function will try to return the residue/
            atom selection
    Output:
        atom_subset
            numpy.array 
            array with all the atom numbers corresponding to selection
        md_topology
            mdtraj.core.topology.Topology object of protein 
    """
    ## First have to load all the inputs if they are defined by a path
    if isinstance(topology, str):
        if os.path.exists(topology):
            try:
                md_topology = md.load_topology(topology)
            except:
                sys.exit('Make sure you have provided a valid path to topology file!')
            else:
                if verbose > 0:
                    print 'The following topology file was succesfully loaded: \n %s \n' %(md_topology)

    elif isinstance(topology, md.core.topology.Topology):
        md_topology = topology
        if verbose > 0:
            print 'The following topology file was succesfully loaded: \n %s \n' %(md_topology)
    else:
        sys.exit('Invalid input! Must be a valid path to topology file or mdtraj.Topology object')
    
    ## if selection is True the function will try to obtain the specified atoms/residues
    ## if residue name is specified it will by default look for C-alpha atoms
    if selection:
        if isinstance(residue_selection, list):
            try:
                atom_subset = md_topology.select(residue_selection)
            except:
                sys.exit('Invalid atom selection in list!')
            else:
                if verbose > 1:
                    print 'Your selection includes the following atom(s): \n %s \n' %(atom_subset)
                    print 'Your selection includes the following residues: \n'
                    for residue in md_topology.subset(atom_subset).residues:
                        print residue
        elif isinstance(residue_selection, str):
            try:
                atom_subset = md_topology.select('name CA and ' + residue_selection)
            except:
                sys.exit('Check if your atom selection command is recognized by the Mdtraj atom selection language!')
            else:
                if verbose > 1:
                    print 'Your selection includes the following atom(s): \n %s \n' %(atom_subset)
                    print 'Your selection includes the following residues: \n'
                    for residue in md_topology.subset(atom_subset).residues:
                        print residue
        else:
            sys.exit('Make sure you provided a valid residue selection!')
    else:
        atom_subset=md_topology.select('name CA and ' + 'all')
    
    # now that we are sure that both topology and selection are valid we can return atom_subset
    # and use the loaded topology file
    
    
    return atom_subset, md_topology
Esempio n. 27
0
def grid_solvent(option_dict, _start, _stop):

    _results = OrderedDict()

    traj_path = option_dict['trajin'][0]
    parm_path = option_dict['parm'][0]
    zz_sele = "".join(option_dict['zz'])
    xx_sele = "".join(option_dict['xx'])
    cntr_sele = "".join(option_dict['center_sele'])
    dims = np.array(option_dict['dims'])

    zz_ref_sele = "".join(option_dict['zz_ref'])
    xx_ref_sele = "".join(option_dict['xx_ref'])
    water_str = "".join(option_dict['water'])

    image = not option_dict['noimage']

    verbose = option_dict['verbose']

    if verbose:
        print "Preparing selection masks..."
    no_zz_ref = False
    zz_ref_crds = None
    if zz_ref_sele.rstrip().lstrip().startswith("None"):
        no_zz_ref = True
    no_xx_ref = False
    xx_ref_crds = None
    if xx_ref_sele.rstrip().lstrip().startswith("None"):
        no_xx_ref = True

    topo = md.load_topology(parm_path)

    ### Z axis
    zz_indxs = topo.select(zz_sele)
    if not no_zz_ref:
        zz_ref_indxs = topo.select(zz_ref_sele)
    ### X axis
    xx_indxs = topo.select(xx_sele)
    if not no_xx_ref:
        xx_ref_indxs = topo.select(xx_ref_sele)
    ### Center
    if cntr_sele.rstrip().lstrip().startswith("None"):
        center_idxs = np.unique(np.concatenate((xx_indxs, zz_indxs)))
        solvent = topo.select("%s and not (%s or %s)" %
                              (water_str, zz_sele, xx_sele))
        solvent_O = topo.select("(%s and name O) and not (%s or %s)" %
                                (water_str, zz_sele, xx_sele))
    else:
        center_idxs = topo.select(cntr_sele)
        solvent = topo.select("%s and not (%s or %s or %s)" %
                              (water_str, zz_sele, xx_sele, cntr_sele))
        solvent_O = topo.select("(%s and name O) and not (%s or %s or %s)" %
                                (water_str, zz_sele, xx_sele, cntr_sele))

    sites = solvent_O[2] - solvent_O[1]
    solv_field = solvent_field(solvent_O.shape[0], sites, dims)

    frame_range = range(_start, _stop)
    N_frames = _stop - _start

    uc_data = np.zeros((N_frames * 3, 3), dtype=np.float)
    pop_data = np.zeros(N_frames, dtype=np.int)
    center_data = np.zeros((N_frames, 3), dtype=np.float)
    origin_data = np.zeros((N_frames, 3), dtype=np.float)
    O_idxs_data = None
    theta_data = None
    phi_data = None
    psi_data = None
    xx1_wat_data = None
    xx2_wat_data = None
    yy_wat_data = None
    zz_wat_data = None
    O_frac_data = None
    H1_frac_data = None
    H2_frac_data = None
    frame_data = None

    if image:
        uc = np.eye(3, 3)
    else:
        uc = None

    zz_crds = np.zeros((zz_indxs.shape[0], 3), dtype=np.float)
    xx_crds = np.zeros((xx_indxs.shape[0], 3), dtype=np.float)
    solv_crds = np.zeros((solvent.shape[0], 3), dtype=np.float)
    cntr_crds = np.zeros((center_idxs.shape[0], 3), dtype=np.float)

    if verbose:
        print "Start processing trajectory..."
    with md.open(traj_path) as md_traj:
        started_fill = False
        for i in range(N_frames):
            frame_i = frame_range[i]
            if verbose and i % 100 == 0:
                print "Frame %d..." % frame_i
            if traj_path.endswith(".pdb"):
                frame = md.load_frame(traj_path, index=frame_i, top=topo)
            else:
                md_traj.seek(frame_i)
                frame = md_traj.read_as_traj(topo, n_frames=1, stride=1)

            if not no_zz_ref:
                zz_ref_crds = np.mean(frame.xyz[0][zz_ref_indxs] * 10., axis=0)
            if not no_xx_ref:
                xx_ref_crds = np.mean(frame.xyz[0][xx_ref_indxs] * 10., axis=0)

            zz_crds = frame.xyz[0][zz_indxs] * 10.
            xx_crds = frame.xyz[0][xx_indxs] * 10.
            solv_crds = frame.xyz[0][solvent] * 10.
            cntr_crds = frame.xyz[0][center_idxs] * 10.

            if image:
                uc[:] = frame.unitcell_vectors[0] * 10.

            solv_field.set_axis(xx_crds, zz_crds, xx_ref_crds, zz_ref_crds)
            solv_field.set_center(cntr_crds.mean(axis=0))
            solv_field.update_field(solv_crds, uc)

            j = i * 3
            uc_data[j:j +
                    3, :] = solv_field.get_nice_frac2real() / solv_field.delta
            pop_data[i] = solv_field.N_inside
            center_data[i] = solv_field.center
            origin_data[i] = solv_field.origin

            if solv_field.N_inside > 0:

                if not started_fill:
                    O_idxs_data = np.copy(solv_field.inside_idxs)
                    theta_data = np.copy(solv_field.theta)
                    phi_data = np.copy(solv_field.phi)
                    psi_data = np.copy(solv_field.psi)

                    xx1_wat_data = np.copy(solv_field.xx1_wat)
                    xx2_wat_data = np.copy(solv_field.xx2_wat)
                    yy_wat_data = np.copy(solv_field.yy_wat)
                    zz_wat_data = np.copy(solv_field.zz_wat)

                    O_frac_data = np.copy(solv_field.O_crds_frac)
                    H1_frac_data = np.copy(solv_field.H1_crds_frac)
                    H2_frac_data = np.copy(solv_field.H2_crds_frac)

                    frame_data = np.empty(solv_field.N_inside, dtype=np.int)
                    frame_data.fill(frame_i)

                    started_fill = True

                else:
                    O_idxs_data = np.concatenate(
                        (O_idxs_data, solv_field.inside_idxs))
                    theta_data = np.concatenate((theta_data, solv_field.theta))
                    phi_data = np.concatenate((phi_data, solv_field.phi))
                    psi_data = np.concatenate((psi_data, solv_field.psi))

                    xx1_wat_data = np.concatenate(
                        (xx1_wat_data, solv_field.xx1_wat))
                    xx2_wat_data = np.concatenate(
                        (xx2_wat_data, solv_field.xx2_wat))
                    yy_wat_data = np.concatenate(
                        (yy_wat_data, solv_field.yy_wat))
                    zz_wat_data = np.concatenate(
                        (zz_wat_data, solv_field.zz_wat))

                    O_frac_data = np.concatenate(
                        (O_frac_data, solv_field.O_crds_frac))
                    H1_frac_data = np.concatenate(
                        (H1_frac_data, solv_field.H1_crds_frac))
                    H2_frac_data = np.concatenate(
                        (H2_frac_data, solv_field.H2_crds_frac))

                    __frame_data = np.empty(solv_field.N_inside, dtype=np.int)
                    __frame_data.fill(frame_i)
                    frame_data = np.concatenate((frame_data, __frame_data))

                if verbose and i % 100 == 0:
                    write_files(XYZ=solv_field.O_crds_frac,
                                Format='PDB',
                                Filename='frac_frame%d.pdb' % frame_i)
                    write_files(XYZ=solv_field.O_crds,
                                Format='PDB',
                                Filename='real_frame%d.pdb' % frame_i)

    _results['uc_data'] = uc_data
    _results['pop_data'] = pop_data
    _results['center_data'] = center_data
    _results['origin_data'] = origin_data
    _results['O_idxs_data'] = O_idxs_data
    _results['theta_data'] = theta_data
    _results['phi_data'] = phi_data
    _results['psi_data'] = psi_data
    _results['xx1_wat_data'] = xx1_wat_data
    _results['xx2_wat_data'] = xx2_wat_data
    _results['yy_wat_data'] = yy_wat_data
    _results['zz_wat_data'] = zz_wat_data
    _results['O_frac_data'] = O_frac_data
    _results['H1_frac_data'] = H1_frac_data
    _results['H2_frac_data'] = H2_frac_data
    _results['frame_data'] = frame_data

    return _results
Esempio n. 28
0
    def calculate_grid_quantities(self, energy=True, entropy=True, hbonds=True):
        """
        Performs grid-based solvation thermodynamics and structure calculations by iterating
        over frames in the trajectory.

        Parameters
        ----------
        energy : bool, optional

        entropy :

        hbonds :

        Returns
        -------

        """
        print_progress_bar(0, self.num_frames)
        if not self.topology_file.endswith(".h5"):
            topology = md.load_topology(self.topology_file)
        read_num_frames = 0
        with md.open(self.trajectory) as f:
            for frame_i in range(self.start_frame, self.start_frame + self.num_frames):
                print_progress_bar(frame_i - self.start_frame, self.num_frames)
                f.seek(frame_i)
                if not self.trajectory.endswith(".h5"):
                    trj = f.read_as_traj(topology, n_frames=1, stride=1)
                else:
                    trj = f.read_as_traj(n_frames=1, stride=1)
                if trj.n_frames == 0:
                    print("No more frames to read.")
                    break
                else:
                    self._process_frame(trj, energy, hbonds, entropy)
                    read_num_frames += 1
            if read_num_frames < self.num_frames:
                print(("{0:d} frames found in the trajectory, resetting self.num_frames.".format(read_num_frames)))
                self.num_frames = read_num_frames

        # Normalize voxel quantities
        for voxel in range(self.voxeldata.shape[0]):
            if self.voxeldata[voxel, 4] > 1.0:
                self.voxeldata[voxel, 14] = self.voxeldata[voxel, 13] / (self.voxeldata[voxel, 4] * 2.0)
                self.voxeldata[voxel, 13] /= (self.num_frames * self.voxel_vol * 2.0)
                self.voxeldata[voxel, 16] = self.voxeldata[voxel, 15] / (self.voxeldata[voxel, 4] * 2.0)
                self.voxeldata[voxel, 15] /= (self.num_frames * self.voxel_vol * 2.0)
                if self.voxeldata[voxel, 19] > 0.0:
                    self.voxeldata[voxel, 18] = self.voxeldata[voxel, 17] / (self.voxeldata[voxel, 19] * 2.0)
                    self.voxeldata[voxel, 17] /= (self.num_frames * self.voxel_vol * self.voxeldata[voxel, 19] * 2.0)
                for i in range(19, 35, 2):
                    self.voxeldata[voxel, i + 1] = self.voxeldata[voxel, i] / self.voxeldata[voxel, 4]
                    self.voxeldata[voxel, i] /= (self.num_frames * self.voxel_vol)
            else:
                self.voxeldata[voxel, 13] *= 0.0
                self.voxeldata[voxel, 15] *= 0.0
                if self.voxeldata[voxel, 19] > 0.0:
                    self.voxeldata[voxel, 17] *= 0.0
                for i in range(19, 35, 2):
                    self.voxeldata[voxel, i] *= 0.0

        # Calculate entropies
        if entropy:
            self.calculate_entropy(num_frames=self.num_frames)
Esempio n. 29
0
def Extend_noes(df_noes, gro_file):
    ''' Ambiguous NOEs (M* and Q*) indicate multiple protons and are made
    explicit.

    Input:

        - df_noes (pandas dataframe): NOE distances dataframe
        - gro_file (str): topology file

    Output:

        - df_noes (pandas dataframe) : NOE distances modified dataframe
    '''

    top = md.load_topology(gro_file)

    # First Atom ##

    todel = []
    toadd = []

    for i, row in df_noes.iterrows():

        if row.Atom1.startswith("M"):
            selection = "resid "+str(int(row.ResID1)-1) + \
                        " and (name =~ 'H"+row.Atom1[1:]+".*')"

            sel = top.select(selection)
            assert len(sel) == 3, str(row) + str(sel)
            todel.append(i)
            for atom in [top.atom(a) for a in sel]:
                newrow = row.copy()
                newrow.Atom1 = atom.name
                newrow.AtomID1 = atom.index
                newrow.Origin += 10
                toadd.append(newrow)

        elif row.Atom1.startswith("Q"):
            selection = "resid "+str(int(row.ResID1)-1) + \
                        " and (name =~ 'H"+row.Atom1[1:]+".*')"

            sel = top.select(selection)
            assert len(sel) == 2, str(row) + str(sel)
            todel.append(i)
            for atom in [top.atom(a) for a in sel]:
                newrow = row.copy()
                newrow.Atom1 = atom.name
                newrow.AtomID1 = atom.index
                newrow.Origin += 10
                toadd.append(newrow)

    df_noes = df_noes.drop(todel)
    df_noes = df_noes.append(toadd, ignore_index=True)

    # Second Atom ##

    todel = []
    toadd = []

    for i, row in df_noes.iterrows():
        if row.Atom2.startswith("M"):
            selection = "resid "+str(int(row.ResID2)-1) + \
                        " and (name =~ 'H"+row.Atom2[1:]+".*')"

            sel = top.select(selection)
            assert len(sel) == 3, str(row) + str(sel)
            todel.append(i)
            for atom in [top.atom(a) for a in sel]:
                newrow = row.copy()
                newrow.Atom2 = atom.name
                newrow.AtomID2 = atom.index
                newrow.Origin += 10
                toadd.append(newrow)

        elif row.Atom2.startswith("Q"):
            selection = "resid "+str(int(row.ResID2)-1) + \
                        " and (name =~ 'H"+row.Atom2[1:]+".*')"

            sel = top.select(selection)
            assert len(sel) == 2, str(row) + str(sel)
            todel.append(i)
            for atom in [top.atom(a) for a in sel]:
                newrow = row.copy()
                newrow.Atom2 = atom.name
                newrow.AtomID2 = atom.index
                newrow.Origin += 10
                toadd.append(newrow)

    df_noes = df_noes.drop(todel)
    df_noes = df_noes.append(toadd, ignore_index=True)
    df_noes = df_noes.sort_values(['ResID1', 'ResID2'])

    return df_noes
Esempio n. 30
0
## Calculate the correlation functions and the standard deviation in the correlation function.
## Save the correlation functions in a dataframe and then to a csv file for later use.

Ct, dCt = calc_Ct(vecs_LS)
CtDF = pd.DataFrame(Ct, index = np.arange(1,Ct.shape[0]+1)*20/1000, columns=NH_Res)
dCtDF = pd.DataFrame(dCt, index = np.arange(1,dCt.shape[0]+1)*20/1000, columns=NH_Res)
CtDF.to_csv('/scratch/users/ah14k/ChiZ/Analysis/AMBER03WS/Ct_{}_comb_36us.csv'.format(tauL))
dCtDF.to_csv('/scratch/users/ah14k/ChiZ/Analysis/AMBER03WS/dCt_{}_comb_36us.csv'.format(tauL))

# Begin Curve Fitting; If you don't need to calculate the vectors then skip to here.

# In[20]:

## Load Experimental NOE data: This will depend on the shape of your NMR data
top14 = "{}AMBER14SB/Tip4pD/PROD/Analysis/07_Prod.noH20.ChiZN_0.025M-NaCl_capped.prmtop".format(ChiZLoc)
parm714 = md.load_topology(top14)
CAsel = parm714.select('name N and not resname PRO')
RESCaINFO = np.array(["{}".format(parm714.atom(x)) for x in CAsel])
RESINFO = np.array([x.replace('-N',"") for x in RESCaINFO])
EXPNOEF = "/scratch/users/ah14k/ChiZ/ChiZN164NOEpH7_New.csv"
EXPNOEdf = pd.read_table(EXPNOEF,delimiter=',',skiprows=1,header=None, names=['Residue Number','T1','T1_Err','T2','T2_Err','NOE','NOE_Err'])
EXPNOEdf = EXPNOEdf.drop(0)
EXPNOEdf = EXPNOEdf.replace('-',np.nan)
EXPNOEdf['RES'] = RESINFO
EXPNOEdf.iloc[:,1:5] = EXPNOEdf.iloc[:,1:5].astype('float')/1000
EXPNOEdf.iloc[:,5:7] = EXPNOEdf.iloc[:,5:7].astype('float')


# In[21]:
## Calculate mean array for the experimental data 
T1MEANArrExpNT = np.array([0.5732]*EXPNOEdf.loc[3:24].shape[0])
Esempio n. 31
0
    def _truncate_trajectory_file(self, number_of_frames):
        """Truncates the trajectory file to the specified number
        of frames.

        Parameters
        ----------
        number_of_frames: int
            The number of frames to truncate to.
        """
        import mdtraj
        from mdtraj.formats.dcd import DCDTrajectoryFile
        from mdtraj.utils import in_units_of

        # Load in the required topology object.
        topology = mdtraj.load_topology(self.input_coordinate_file)

        # Parse the internal mdtraj distance unit. While private access is
        # undesirable, this is never publicly defined and I believe this
        # route to be preferable over hard coding this unit here.
        base_distance_unit = mdtraj.Trajectory._distance_unit

        # Get an accurate measurement of the length of the trajectory
        # without reading it into memory.
        trajectory_length = 0

        for chunk in mdtraj.iterload(self._local_trajectory_path,
                                     top=topology):
            trajectory_length += len(chunk)

        # Make sure there is at least the expected number of frames.
        if trajectory_length < number_of_frames:

            raise ValueError(
                f'The saved number of trajectory frames ({trajectory_length}) '
                f'is less than expected ({number_of_frames}).')

        elif trajectory_length == number_of_frames:
            return

        # Truncate the trajectory by streaming one frame of the trajectory at
        # a time.
        temporary_trajectory_path = f'{self._local_trajectory_path}.tmp'

        with DCDTrajectoryFile(self._local_trajectory_path, 'r') as input_file:

            with DCDTrajectoryFile(temporary_trajectory_path,
                                   'w') as output_file:

                for frame_index in range(0, number_of_frames):

                    frame = input_file.read_as_traj(topology,
                                                    n_frames=1,
                                                    stride=1)

                    output_file.write(
                        xyz=in_units_of(frame.xyz, base_distance_unit,
                                        output_file.distance_unit),
                        cell_lengths=in_units_of(frame.unitcell_lengths,
                                                 base_distance_unit,
                                                 output_file.distance_unit),
                        cell_angles=frame.unitcell_angles[0])

        os.replace(temporary_trajectory_path, self._local_trajectory_path)

        # Do a sanity check to make sure the trajectory was successfully truncated.
        new_trajectory_length = 0

        for chunk in mdtraj.iterload(self._local_trajectory_path,
                                     top=self.input_coordinate_file):
            new_trajectory_length += len(chunk)

        if new_trajectory_length != number_of_frames:
            raise ValueError('The trajectory was incorrectly truncated.')
Esempio n. 32
0
start_time = 0.0
step = 200.0  # ps per md frame
max_conf = 1000  # max number of conformations that can be handled to efficiently calculate RMSDs
n_clusters = 6
n_ref = 4  # number of reference models to use for comparison

# Load trajectories to analyse

trajs = []
for i in [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20]:
    print(i)
    #for i in range(1,no_replicas+1):
    trajs.append(
        mdtraj.load('../' + str(i) + '/peptide_conf/all_novsite_fit.xtc',
                    top='../frame0_chainA_novsite.pdb'))
topology = mdtraj.load_topology('../frame0_chainA_novsite.pdb')

# Load reference structures

sys1 = mdtraj.load_topology('pept_exp_models/semiclosed23_new1.pdb')
sys2 = mdtraj.load_topology('pept_exp_models/Tamm_1.pdb')
sys3 = mdtraj.load_topology('pept_exp_models/Bax_cut_20aa_1.pdb')
sys4 = mdtraj.load_topology('pept_exp_models/1xop_straight_helix_model1.pdb')

ref1 = mdtraj.load('pept_exp_models/semiclosed23_new1.pdb',
                   atom_indices=sys1.select('resid 0 to 19 and backbone'))
ref2 = mdtraj.load('pept_exp_models/Tamm_1.pdb',
                   atom_indices=sys2.select('backbone'))
ref3 = mdtraj.load('pept_exp_models/Bax_cut_20aa_1.pdb',
                   atom_indices=sys3.select('backbone'))
ref4 = mdtraj.load('pept_exp_models/1xop_straight_helix_model1.pdb',
Esempio n. 33
0
    def generate_clusters(self, density_factor, ligand_file,
                          clustercenter_file):
        """Generate hydration sites from water molecules found in the binding site
        during the simulation. Clustering is done in two steps; i). An initial clustering over a 10%
        of frames, and ii). A refinement step where all frames are used.

        Parameters

        ----------
        ligand_file : string
            Name of the PDB file containing atomic coordinates of the ligand,
            assumed to be co-crystallized with the protein.

        Returns
        -------
        final_cluster_coords : numpy.ndarray
            Coordinates of hydration sites, represented by a 2-D array with shape N x 3,
            where N is the number of hydration sites identified during clustering.

        site_waters : list
            List of N sub-lists where N is the number of identified hydration sites, each sublist
            consist of a 3-element tuple for every water identified in that site. First element of
            the tuple is frame number, second element is correct index of the oxygen atom in the
            the original topology and third element is the offset index as read from a version of
            a trimmed version trajectory for clustering.

        Notes
        -----
        The following attributes of the object are updated when the clustering is successfully completed.
        self.hsa_region_O_ids:
            The indices of water oxygen atoms in HSA region for each frame are stored
            in the corresponding lists.
        self.hsa_region_flat_ids:
            Same as above except that indices are not atom indices from the topology
            but in a sequence from 0 to N, where N is the total number of water oxygen atoms found in the
            HSA region throughout the simulation.
        self.hsa_region_water_coords:
            An N x 3 numpy array is initialized, where N is the total number of water water oxygen atoms found in the
            HSA region throughout the simulation. The array gets populated during individual frame processing.
        """
        sphere_radius = md.utils.in_units_of(1.0, "angstroms", "nanometers")
        topology = md.load_topology(self.topology_file)
        if self.non_water_atom_ids.shape[0] == 0:
            raise Exception(
                ValueError,
                "Clustering is supported only for solute-solvent systems, no solute atoms found."
            )

        ligand = md.load_pdb(ligand_file, no_boxchk=True)
        ligand_coords = ligand.xyz[0, :, :]
        binding_site_atom_indices = np.asarray(
            list(range(ligand_coords.shape[0])))
        init_cluster_coords = None
        # Step 1: Initial Clustering if user didn't provide cluster centers
        if clustercenter_file is None:
            clustering_stride = 10
            print("Reading trajectory for clustering.")
            with md.open(self.trajectory) as f:
                f.seek(self.start_frame)
                # read all frames if no frames specified by user
                if self.num_frames is None:
                    trj_short = f.read_as_traj(
                        topology,
                        atom_indices=np.concatenate(
                            (binding_site_atom_indices,
                             self.wat_oxygen_atom_ids
                             )))[self.start_frame::clustering_stride]
                else:
                    trj_short = f.read_as_traj(
                        topology,
                        atom_indices=np.concatenate((binding_site_atom_indices,
                                                     self.wat_oxygen_atom_ids))
                    )[self.start_frame:self.num_frames:clustering_stride]
                    print(trj_short.n_frames)
                if trj_short.n_frames < 10:
                    sys.exit(
                        "Clustering requires at least 100 frames, current trajectory contains {0:d} frames."
                        .format(trj_short.n_frames))
                print("Performing an initial clustering over {0:d} frames.".
                      format(trj_short.n_frames))
                # Obtain water molecules solvating the binding site
                # FIXME: This is a workaround to use MDTraj compute_neighbor function xyz coordinates of the trajectory are
                # modified such that first n atoms coordinates are switched to n atoms of ligand coordinates.
                # Unexpected things will happen if the number of solute atoms less than the number of ligand atoms, which is
                # highly unlikely.
                coords = trj_short.xyz
                for i_frame in range(trj_short.n_frames):
                    for pseudo_index in range(
                            binding_site_atom_indices.shape[0]):
                        coords[i_frame, pseudo_index, :] = ligand_coords[
                            pseudo_index, :]

                haystack = np.setdiff1d(trj_short.topology.select("all"),
                                        binding_site_atom_indices)
                binding_site_waters = md.compute_neighbors(
                    trj_short,
                    self.hsa_region_radius,
                    binding_site_atom_indices,
                    haystack_indices=haystack)
                # generate a list of tuples, each tuple is a water and corresponding frame number in trj_short
                water_id_frame_list = [(i, nbr)
                                       for i in range(len(binding_site_waters))
                                       for nbr in binding_site_waters[i]]

                # Start initial clustering by building a KDTree and get initial neighbor count for all waters
                water_coordinates = np.ma.array(
                    [coords[wat[0], wat[1], :] for wat in water_id_frame_list],
                    mask=False)
                tree = spatial.cKDTree(water_coordinates)
                nbr_list = tree.query_ball_point(water_coordinates,
                                                 sphere_radius)
                nbr_count_list = np.ma.array([len(nbrs) for nbrs in nbr_list],
                                             mask=False)
                cutoff = trj_short.n_frames * density_factor * 0.1401
                if np.ceil(cutoff) - cutoff <= 0.5:
                    cutoff = np.ceil(cutoff)
                else:
                    cutoff = np.floor(cutoff)
                n_wat = 3 * cutoff

                # Set up clustering loop
                cluster_list = []
                cluster_iter = 0
                while n_wat > cutoff:
                    # Get water with max nbrs and retrieve its neighbors and marked for exclusion in next iteration
                    max_index = np.argmax(nbr_count_list)
                    to_exclude = np.array(nbr_list[max_index])
                    # Set current water count to current neighbors plus one for the water itself
                    n_wat = len(to_exclude) + 1

                    # Mask current water, its neighbors so that they are not considered in the next iteration
                    nbr_count_list.mask[to_exclude] = True
                    nbr_count_list.mask[max_index] = True
                    # Mask current waters' and its neighbors' coords so that they are not considered in the next iteration
                    water_coordinates.mask[to_exclude] = True
                    water_coordinates.mask[max_index] = True

                    # Accumulate neighbors for each water in current cluster, removing common neighbors
                    nbrs_of_to_exclude = np.unique(
                        np.array([
                            n_excluded
                            for excluded_nbrs in nbr_list[to_exclude]
                            for n_excluded in excluded_nbrs
                        ]))

                    # Obtain the list of waters whose neighbors need to be updated due to exclusion of the waters above
                    to_update = np.setxor1d(to_exclude, nbrs_of_to_exclude)
                    to_update = np.setdiff1d(to_update, np.asarray(max_index))

                    # Update the neighbor count for each water from the list generated above
                    if to_update.shape[0] != 0:
                        tree = spatial.cKDTree(water_coordinates)
                        updated_nbr_list = tree.query_ball_point(
                            water_coordinates[to_update], sphere_radius)
                        # for each updated member, get its original index and update the original neighbor search list
                        for index, nbrs in enumerate(updated_nbr_list):
                            if not nbr_count_list.mask[to_update[index]]:
                                nbr_count_list[to_update[index]] = len(nbrs)

                    # Check distances with previously identified clusters and do not consider if within 1.2 A
                    # of an existing cluster
                    current_wat = water_id_frame_list[max_index]
                    current_wat_coords = md.utils.in_units_of(
                        coords[current_wat[0], current_wat[1], :],
                        "nanometers", "angstroms")
                    near_flag = 0
                    if len(cluster_list) != 0:
                        for clust in cluster_list:
                            clust_coords = coords[clust[0], clust[1], :]
                            dist = np.linalg.norm(current_wat_coords -
                                                  clust_coords)
                            if dist < 1.20:
                                near_flag += 1
                    if near_flag == 0:
                        cluster_iter += 1
                        cluster_list.append(water_id_frame_list[max_index])
                init_cluster_coords = [
                    coords[cluster[0], cluster[1], :]
                    for cluster in cluster_list
                ]
        else:
            clusters_pdb_file = md.load_pdb(clustercenter_file, no_boxchk=True)
            init_cluster_coords = clusters_pdb_file.xyz[0, :, :]

        # Read full trajectory
        print("Reading trajectory to obtain water molecules for each cluster.")
        with md.open(self.trajectory) as f:
            f.seek(self.start_frame)
            if self.num_frames is None:
                trj = f.read_as_traj(topology,
                                     stride=1,
                                     atom_indices=np.concatenate(
                                         (binding_site_atom_indices,
                                          self.wat_oxygen_atom_ids)))
                self.num_frames = trj.n_frames
            else:
                trj = f.read_as_traj(topology,
                                     n_frames=self.num_frames,
                                     stride=1,
                                     atom_indices=np.concatenate(
                                         (binding_site_atom_indices,
                                          self.wat_oxygen_atom_ids)))
                if trj.n_frames < self.num_frames:
                    print((
                        "Warning: {0:d} frames found in the trajectory, resetting self.num_frames."
                        .format(trj.n_frames)))
                    self.num_frames = trj.n_frames
            for i_frame in range(trj.n_frames):
                for pseudo_index in range(binding_site_atom_indices.shape[0]):
                    trj.xyz[i_frame,
                            pseudo_index, :] = ligand_coords[pseudo_index, :]
            haystack = np.setdiff1d(trj.topology.select("all"),
                                    binding_site_atom_indices)
            start_point = haystack[0]
            binding_site_waters = md.compute_neighbors(
                trj,
                self.hsa_region_radius,
                binding_site_atom_indices,
                haystack_indices=haystack)
            # From the full frame-wise set of waters in the binding site, build two more frame-wise lists
            # one where each frame has a correct index of waters and another with a new index which ranges from
            # 0 to M, where M is the total number of hsa region waters - 1
            start = 0
            for i in range(len(binding_site_waters)):
                self.hsa_region_O_ids.append([])
                self.hsa_region_flat_ids.append([])
                for wat in binding_site_waters[i]:
                    wat_0 = wat - start_point
                    wat_offset = (
                        wat_0 * self.water_sites) + self.wat_oxygen_atom_ids[0]
                    self.hsa_region_O_ids[i].append(wat_offset)
                    self.hsa_region_flat_ids[i].append(start)
                    start += 3

            water_id_frame_list = [(i, nbr)
                                   for i in range(len(binding_site_waters))
                                   for nbr in binding_site_waters[i]]
            water_coordinates = np.array(
                [trj.xyz[wat[0], wat[1], :] for wat in water_id_frame_list])

        # Initialize array that stores coordinates all water molecules in HSA region, used for entropy calcs
        self.hsa_region_water_coords = np.zeros(
            (len(water_id_frame_list) * 3, 3), dtype=float)
        tree = spatial.cKDTree(water_coordinates)
        nbr_list = tree.query_ball_point(init_cluster_coords, sphere_radius)
        final_cluster_coords = []
        cutoff = int(self.num_frames * density_factor * 0.1401)
        if np.ceil(cutoff) - cutoff <= 0.5:
            cutoff = np.ceil(cutoff)
        else:
            cutoff = np.floor(cutoff)

        # apply refinement if user defined clusters not provided
        if clustercenter_file is None:
            # Step 2: Refinement
            # Initialize variables and data structures
            # Read in the trajectory but only first N solute atoms where N equals the number of ligand atoms
            # plus all water oxygen atoms
            # WARNING: This shifts indices of waters and once they are assigned to clusters, the indices need to
            # be corrected.

            print((
                "Refining initial cluster positions by considering {0:d} frames."
                .format(self.num_frames)))
            # For each cluster, set cluster center equal to geometric center of all waters in the cluster
            site_waters = []
            cluster_index = 1
            for cluster in nbr_list:
                cluster_water_coords = water_coordinates[cluster]
                if len(cluster) > cutoff:
                    near_flag = 0
                    waters_offset = [
                        (water_id_frame_list[wat][0] + self.start_frame,
                         ((water_id_frame_list[wat][1] - start_point) *
                          self.water_sites) + self.wat_oxygen_atom_ids[0])
                        for wat in cluster
                    ]

                    com = np.zeros(3)
                    masses = np.ones(cluster_water_coords.shape[0])
                    masses /= masses.sum()
                    com[:] = water_coordinates[cluster].T.dot(masses)
                    cluster_center = com[:]
                    # Raise flag if the current cluster center is within 1.2 A of existing cluster center
                    for other, coord in enumerate(final_cluster_coords[:-1]):
                        dist = np.linalg.norm(
                            md.utils.in_units_of(cluster_center, "nanometers",
                                                 "angstroms") - coord)
                        if dist < 1.20:
                            near_flag += 1
                    # Only add cluster center if it is at a safe distance from others
                    if near_flag == 0:
                        final_cluster_coords.append(
                            md.utils.in_units_of(cluster_center, "nanometers",
                                                 "angstroms"))
                        site_waters.append(waters_offset)
                        cluster_index += 1
        # otherwise store data for each user defined cluster
        else:
            # For each cluster, set cluster center equal to geometric center of all waters in the cluster
            final_cluster_coords = md.utils.in_units_of(
                init_cluster_coords, "nanometers", "angstroms")
            site_waters = []
            cluster_index = 1
            for cluster in nbr_list:
                waters_offset = [
                    (water_id_frame_list[wat][0] + self.start_frame,
                     ((water_id_frame_list[wat][1] - start_point) *
                      self.water_sites) + self.wat_oxygen_atom_ids[0])
                    for wat in cluster
                ]
                site_waters.append(waters_offset)
                cluster_index += 1

        # Write clustercenter file
        write_watpdb_from_coords("clustercenterfile", final_cluster_coords)
        self.clustercenter_file = "clustercenterfile.pdb"
        print(("Final number of clusters: {0:d}".format(
            len(final_cluster_coords))))
        return np.asarray(final_cluster_coords), site_waters
Esempio n. 34
0
parser = argparse.ArgumentParser(
    description='This script demonstrates integration of pymdash with mdtraj'
    ' to extract the representative frame for each dash state from a raw '
    'trajectory file.')
parser.add_argument('dash_out_file', help='Path to the output file from dash')
parser.add_argument('trajectory_file',
                    help='Path to the trajectory data file used to provide'
                    ' input for the dash run.')
parser.add_argument(
    '-t',
    '--topology',
    help='Path to a suitable structure file that can be '
    'used to derive a topology by mdtraj. See md_traj.load_toplogy for details'
)
args = parser.parse_args()

if args.topology is not None:
    topology = md.load_topology(args.topology)
else:
    topology = None

with open(args.dash_out_file) as f:
    dash = mdash.DashOutput(f)

print('Writing pdb files for {} dash states'.format(dash.n_states))
for state in dash.states:
    traj = md.load_frame(args.trajectory_file,
                         state.rep_frame - 1,
                         top=topology)
    traj.save_pdb('state_{}.pdb'.format(state.index))