Exemple #1
0
    def __init__(self, ff_type=None, system_file=None, **kwargs):
        """Two pathways can be used starting from FF-specific files
        or OpenMM XML system. Additional kwargs are variously used
        depending on the forcefield / pathway that was chosen.

        supported ff_type
        -----------------
        amber :: give a prmtop and an inpcrd
        openmm :: give an XML file for the system

        supported kwargs
        ----------------
        topology :: system-specific or not depending on FF
        coordinates :: source of coordinates for initial state
        """

        assert (ff_type is None) or (system_file is None)

        # This dict will store the API calls
        # along with atom groups and force
        # parameters needed to generate all
        # the given restraints
        self._restraints = dict()
        self._topology = None

        topofile = kwargs.get("topology", None)
        coordfile = kwargs.get("coordinates", None)

        if ff_type is not None:
            if ff_type.lower() == "amber":
                prmtop = AmberPrmtopFile(topofile)
                inpcrd = AmberInpcrdFile(coordfile)
                self.system = prmtop.createSystem(
                    nonbondedMethod=NoCutoff
                )  #CutoffNonPeriodic - according to Ada, this would be good bc its what amber does - preliminary tests show that this hurts small/medium proteins
                self._topology = Topology.from_openmm(prmtop.topology)
                self._positions = inpcrd

        elif system_file is not None:
            self.load_xml(system_file)
            if topofile:
                if topofile.endswith(".pdb"):
                    # this line is a bit silly but Topology class
                    # doesn't seem to directly load PDB so keeps
                    # the imports clean
                    self._topology = Topology.from_openmm(
                        PDBFile(topofile).topology)

        else:
            # Inspect and set ff_type
            # TODO ff_type as instance attribute
            pass
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames):
    topology = Topology()

    # assert that the ChainID is just an array of empty strings, which appears
    # to be the case in our test systems for this legacy format
    assert np.all(chainid == '' for chainid in ChainID), 'Im not prepaed to parse multiple chains'
    chain0 = topology.add_chain()


    # register the residues
    registered_residues = {}
    for i in np.argsort(ResidueID):
        residue_name = ResidueNames[i]
        if not isinstance(residue_name, str):
            residue_name = residue_name.decode()
        if ResidueID[i] not in registered_residues:
            res = topology.add_residue(residue_name, chain0)
            registered_residues[ResidueID[i]] = res

    # register the atoms
    for i in np.argsort(AtomID):
        atom_name = AtomNames[i]
        if not isinstance(atom_name, str):
            atom_name = atom_name.decode()
        element_symbol = atom_name.lstrip('0123456789')[0]
        element = mdtraj.pdb.element.get_by_symbol(element_symbol)
        topology.add_atom(atom_name, element,
                         registered_residues[ResidueID[i]])

    topology.create_standard_bonds()
    return topology
Exemple #3
0
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames):
    """Build topology object from the arrays stored in the lh5 file"""
    # Delayed import due to wacky recursive imports in compatibilty
    from mdtraj import Topology
    topology = Topology()

    # assert that the ChainID is just an array of empty strings, which appears
    # to be the case in our test systems for this legacy format
    if not np.all(chainid == '' for chainid in ChainID):
        raise NotImplementedError('Im not prepared to parse multiple chains')
    chain0 = topology.add_chain()

    # register the residues
    registered_residues = {}
    for i in np.argsort(ResidueID):
        residue_name = ResidueNames[i]
        if not isinstance(residue_name, basestring):
            residue_name = residue_name.decode()
        if ResidueID[i] not in registered_residues:
            res = topology.add_residue(residue_name, chain0)
            registered_residues[ResidueID[i]] = res

    # register the atoms
    for i in np.argsort(AtomID):
        atom_name = AtomNames[i]
        if not isinstance(atom_name, basestring):
            atom_name = atom_name.decode()
        element_symbol = atom_name.lstrip('0123456789')[0]
        element = mdtraj.pdb.element.get_by_symbol(element_symbol)
        topology.add_atom(atom_name, element,
                          registered_residues[ResidueID[i]])

    topology.create_standard_bonds()
    return topology
Exemple #4
0
    def show_conformations(self,centers_indices=None, rotations_indices=None, nodes_labels=None,
                          least_rmsd_fit='receptor', center_rmsd_fit='receptor'):

        tmp_molcomplex = self.get_conformations(centers_indices, rotations_indices, nodes_labels)
        tmp_mdtraj_topol = _mdtraj_topology.from_openmm(tmp_molcomplex.topology)
        tmp_mdtraj_traj = _mdtraj_trajectory(tmp_molcomplex.positions/unit.nanometer,tmp_mdtraj_topol)
        tmp_view = _nv_show_mdtraj(tmp_mdtraj_traj)
        del(tmp_molcomplex, tmp_mdtraj_topol, tmp_mdtraj_traj)
        return tmp_view
Exemple #5
0
 def subset(self, selector):
     """
     Returns a list of atom indices corresponding to a MDTraj DSL
     query. Also will accept list of numbers, which will be coerced
     to int and returned.
     """
     if isinstance(selector, (list, tuple)):
         return map(int, selector)
     selector = SELECTORS.get(selector, selector)
     mdtop = MDTrajTopology.from_openmm(self.handler.topology)
     return mdtop.select(selector)
Exemple #6
0
    def _map_topology(self):
        """
        Create CG topology from given topology and mapping

        """

        # Ensure that a trajectory has been loaded
        if self._aa_traj is None:
            raise OutOfOrderError("An atomistic trajectory has not "
                                  "been loaded into this Mapper yet.")

        self._atom_bead_mapping = dict()
        self._cg_top = Topology()
        self._solvent_counter = 0

        # Loop over all residues
        for residue in self._aa_top.residues:
            if residue.name == self._solvent_name:
                self._map_solvent_top(residue)
            else:
                self._map_nonsolvent_top(residue)
Exemple #7
0
    def add_mol_to_topology(self, coords: np.ndarray, types: np.ndarray,
                            topology: mdtraj.Topology):
        assert coords.shape[0] == types.shape[0]
        assert coords.ndim == 2
        chain = topology.add_chain()

        # Convert types to symbols
        if types.ndim == 2:
            seqs = np.argmax(types.copy(), axis=1)
            atms = [self.sequential_to_atomic_number()[t] for t in seqs]
            syms = [atomic_number_to_symbol()[t] for t in atms]
        elif types.ndim == 1:
            syms = [atomic_number_to_symbol()[t] for t in types]
        else:
            raise ValueError(
                "Types must either be one hot vectors with ndim==2 XOR numbers with ndim==1."
            )

        for i, s in enumerate(syms):
            res = topology.add_residue("mol_{}".format(i), chain)
            topology.add_atom(s, mdtraj.element.get_by_symbol(s), res)
Exemple #8
0
def _traj_from_xyza(xyz, atomic_numbers, units='nm'):
    """
    Parameters
    ----------
    xyz : np.array, float, shape( num_atom, 3)
        array of x,y,z,a

    atomic_numbers : np.array, int, shape( num_atom, 1 )
        the atomic numbers of each of the atoms.

    Optional Parameters
    -------------------
    units : str
        if units == 'nm' then nothing happens. if units == 'ang' then
        we convert them to nm.
        
    Returns
    -------
    structure : mdtraj.trajectory
        A meta-data minimal mdtraj instance
    """
    
    if units == 'ang':
        xyz /= 10.

    top = Topology()
    chain = top.add_chain()
    residue = top.add_residue('XXX', chain)
    
    for i in range(xyz.shape[0]):
        element_symb = periodic_table[atomic_numbers[i]][1] # should give symbol
        element = Element.getBySymbol(element_symb)
        name = '%s' % element_symb
        top.add_atom(name, element, residue)
    
    structure = Trajectory(xyz=xyz, topology=top)

    return structure
Exemple #9
0
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames):
    """Build topology object from the arrays stored in the lh5 file"""
    # Delayed import due to wacky recursive imports in compatibilty
    from mdtraj import Topology
    topology = Topology()

    # assert that the ChainID is just an array of empty strings, which appears
    # to be the case in our test systems for this legacy format
    if not np.all(chainid == '' for chainid in ChainID):
        raise NotImplementedError('Im not prepared to parse multiple chains')
    chain0 = topology.add_chain()

    # register the residues
    registered_residues = {}
    for i in np.argsort(ResidueID):
        residue_name = ResidueNames[i]
        if not isinstance(residue_name, basestring):
            residue_name = residue_name.decode()
        if ResidueID[i] not in registered_residues:
            res = topology.add_residue(residue_name, chain0)
            registered_residues[ResidueID[i]] = res

    # register the atoms
    for i in np.argsort(AtomID):
        atom_name = AtomNames[i]
        if not isinstance(atom_name, basestring):
            atom_name = atom_name.decode()
        element_symbol = atom_name.lstrip('0123456789')[0]

        try:
            element = elem.get_by_symbol(element_symbol)
        except KeyError:
            element = None

        topology.add_atom(atom_name, element,
                          registered_residues[ResidueID[i]])

    topology.create_standard_bonds()
    return topology
Exemple #10
0
def test_3nch_serial_resSeq():
    # If you use zero-based indexing, this PDB has quite large gaps in residue and atom numbering, so it's a good test case.  See #528
    # Gold standard values obtained via
    # cat 3nch.pdb |grep ATM|tail -n 5
    # HETATM19787  S   SO4 D 804      -4.788  -9.395  22.515  1.00121.87           S
    # HETATM19788  O1  SO4 D 804      -3.815  -9.511  21.425  1.00105.97           O
    # HETATM19789  O2  SO4 D 804      -5.989  -8.733  21.999  1.00116.13           O
    # HETATM19790  O3  SO4 D 804      -5.130 -10.726  23.043  1.00108.74           O
    # HETATM19791  O4  SO4 D 804      -4.210  -8.560  23.575  1.00112.54           O
    t1 = load_pdb(get_fn('3nch.pdb.gz'))
    top, bonds = t1.top.to_dataframe()

    top2 = Topology.from_dataframe(top, bonds)
    eq(t1.top, top2)

    top = top.set_index('serial')  # Index by the actual data in the PDB
    eq(str(top.ix[19791]["name"]), "O4")
    eq(str(top.ix[19787]["name"]), "S")
    eq(str(top.ix[19787]["resName"]), "SO4")
    eq(int(top.ix[19787]["resSeq"]), 804)
Exemple #11
0
def test_3nch_serial_resSeq():
    # If you use zero-based indexing, this PDB has quite large gaps in residue and atom numbering, so it's a good test case.  See #528
    # Gold standard values obtained via
    # cat 3nch.pdb |grep ATM|tail -n 5
    # HETATM19787  S   SO4 D 804      -4.788  -9.395  22.515  1.00121.87           S  
    # HETATM19788  O1  SO4 D 804      -3.815  -9.511  21.425  1.00105.97           O  
    # HETATM19789  O2  SO4 D 804      -5.989  -8.733  21.999  1.00116.13           O  
    # HETATM19790  O3  SO4 D 804      -5.130 -10.726  23.043  1.00108.74           O  
    # HETATM19791  O4  SO4 D 804      -4.210  -8.560  23.575  1.00112.54           O  
    t1 = load_pdb(get_fn('3nch.pdb.gz'))
    top, bonds = t1.top.to_dataframe()
    
    top2 = Topology.from_dataframe(top, bonds)
    eq(t1.top, top2)
    
    top = top.set_index('serial')  # Index by the actual data in the PDB
    eq(str(top.ix[19791]["name"]), "O4")
    eq(str(top.ix[19787]["name"]), "S")
    eq(str(top.ix[19787]["resName"]), "SO4")
    eq(int(top.ix[19787]["resSeq"]), 804)
Exemple #12
0
def to_mdtraj_Topology(item, atom_indices='all', check=True):

    if check:

        digest_item(item, 'molsysmt.Topology')
        atom_indices = digest_atom_indices(atom_indices)

    try:
        from mdtraj import Topology
        from mdtraj.core import element
    except:
        raise LibraryNotFound('mdtraj')

    n_atoms = item.atoms_dataframe.shape[0]

    atom_index_array = item.atoms_dataframe["atom_index"].to_numpy()
    atom_name_array = item.atoms_dataframe["atom_name"].to_numpy()
    atom_id_array = item.atoms_dataframe["atom_id"].to_numpy()
    atom_type_array = item.atoms_dataframe["atom_type"].to_numpy()

    group_index_array = item.atoms_dataframe["group_index"].to_numpy()
    group_name_array = item.atoms_dataframe["group_name"].to_numpy()
    group_id_array = item.atoms_dataframe["group_id"].to_numpy()
    group_type_array = item.atoms_dataframe["group_type"].to_numpy()

    chain_index_array = item.atoms_dataframe["chain_index"].to_numpy()
    chain_name_array = item.atoms_dataframe["chain_name"].to_numpy()
    chain_id_array = item.atoms_dataframe["chain_id"].to_numpy()
    chain_type_array = item.atoms_dataframe["chain_type"].to_numpy()

    bonds_atom1 = item.bonds_dataframe["atom1_index"].to_numpy()
    bonds_atom2 = item.bonds_dataframe["atom2_index"].to_numpy()

    tmp_item = Topology()

    former_group_index = -1
    former_chain_index = -1

    list_new_atoms = []

    for ii in range(n_atoms):

        atom_index = atom_index_array[ii]
        atom_name = atom_name_array[ii]
        atom_id = atom_id_array[ii]
        atom_type = atom_type_array[ii]

        group_index = group_index_array[ii]
        chain_index = chain_index_array[ii]

        new_group = (former_group_index != group_index)
        new_chain = (former_chain_index != chain_index)

        if new_chain:
            chain = tmp_item.add_chain()
            former_chain_index = chain_index

        if new_group:
            residue_name = group_name_array[ii]
            residue_id = group_id_array[ii]
            residue = tmp_item.add_residue(residue_name,
                                           chain,
                                           resSeq=str(residue_id))
            former_group_index = group_index

        elem = element.get_by_symbol(atom_type)
        atom = tmp_item.add_atom(atom_name, elem, residue)

        list_new_atoms.append(atom)

    for atom_1, atom_2 in zip(bonds_atom1, bonds_atom2):

        tmp_item.add_bond(
            list_new_atoms[atom_1],
            list_new_atoms[atom_2])  # falta bond type and bond order

    return tmp_item
Exemple #13
0
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, mapping_function="com"):
    """Create a coarse grained (CG) trajectory from subsets of atoms by 
        computing centers of mass of selected sets of atoms.
    Parameters
    ----------
    atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms)
        List of indices of atoms to combine into CG sites
    bead_label_list : list of maximum 4-letter strings to label CG sites
    chain_list : optional list of chain id's to split resulting beads into separate chains
    resSeq_list : optional list of residue sequence id's to assign cg residues
    segment_id_list : optional list of segment id's to assign cg residues
    inplace : bool, default=False
        If ``True``, the operation is done inplace, modifying ``trj``.
        Otherwise, a copy is returned with the sliced atoms, and
        ``trj`` is not modified.
    bonds : array-like,dtype=int, shape=(n_bonds,2), default=None
        If specified, sets these bonds in new topology 
    mapping_function: string, default='com': how to map xyz coordinates
        options: %s

    Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, 
        those sections most be broken into separate chains or an incorrect topology will result
 
    Returns
    -------
    traj : md.Trajectory
        The return value is either ``trj``, or the new trajectory,
        depending on the value of ``inplace``.
    """%mapping_options.keys()
    if not len(atom_indices_list)==len(bead_label_list):
        raise ValueError("Must supply a list of bead labels of the same length as a list of selected atom indices")
    for bead_label in bead_label_list:
        if not (type(bead_label) is str) or len(bead_label)>4 or len(bead_label)<1:
            raise ValueError("Specified bead label '%s' is not valid, must be a string between 1 and 4 characters"%bead_label)
    bead_label_list = [ bead_label.upper() for bead_label in bead_label_list ]

    if mapping_function not in mapping_options:
        raise ValueError("Must select a mapping function from: %s"%mapping_options.keys())
    map_coords = mapping_options[mapping_function]

    if chain_list is None:
        chain_list = np.ones(len(atom_indices_list),dtype=int)
    elif len(chain_list)!=len(atom_indices_list):
        raise ValueError("Supplied chain_list must be of the same length as a list of selected atom indices")

    if segment_id_list is not None and len(segment_id_list)!=len(atom_indices_list):
        raise ValueError("Supplied segment_id_list must be of the same length as a list of selected atom indices")

    if resSeq_list is not None and len(resSeq_list)!=len(atom_indices_list):
        raise ValueError("Supplied resSeq_list must be of the same length as a list of selected atom indices")

    n_beads = len(atom_indices_list)
    xyz = np.zeros((trj.xyz.shape[0],n_beads,trj.xyz.shape[2]),dtype=trj.xyz.dtype,order='C')
    forces = np.zeros((trj.xyz.shape[0],n_beads,trj.xyz.shape[2]),dtype=np.double,order='C')
    columns = ["serial","name","element","resSeq","resName","chainID"]
    masses = np.array([  np.sum([a.mass for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list],dtype=np.float64)
    charges = np.array([  np.sum([a.charge for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list],dtype=np.float64)

    topology_labels = []
    element_label_dict = {}

    xyz_i = np.zeros((trj.xyz.shape[0],trj.xyz.shape[2]),dtype=trj.xyz.dtype,order='C')

    for i in range(n_beads):
        atom_indices = atom_indices_list[i]
        bead_label = bead_label_list[i]
        #xyz_i = map_coords(trj,atom_indices)

        masses_i = np.array([a.mass for a in trj.top.atoms if a.index in atom_indices_list[i]],dtype=np.float64)

        map_coords(xyz_i,trj.xyz,atom_indices,masses_i,unitcell_lengths=trj.unitcell_lengths)

        xyz[:,i,:] = xyz_i

        if "forces" in trj.__dict__ and len(trj.forces)>0:
            forces_i = map_forces(trj,atom_indices)
            forces[:,i,:] = forces_i

        if resSeq_list is not None:
            resSeq = resSeq_list[i]
        else:
            resSeq = i + 1 

        #element_label='%4s'%('B%i'%(resSeq))
        if not bead_label in element_label_dict:
            element_label='%2s'%('B%i'%(len(element_label_dict)%10))
            element_label_dict[bead_label] = element_label
        else:
            element_label = element_label_dict[bead_label]

        if element_label.strip().upper() not in element.Element._elements_by_symbol:
            element.Element(1000+resSeq, element_label, element_label, masses[i], 1.0)

        topology_labels.append( [i,bead_label,element_label,resSeq,'%3s'%bead_label,chain_list[i]] )

    df = pd.DataFrame(topology_labels,columns=columns)
    topology = Topology.from_dataframe(df,bonds=bonds)
    
    if segment_id_list is not None:
        for beadidx,bead in enumerate(topology.atoms):
            bead.residue.segment_id = segment_id_list[beadidx]
        
    if inplace:
        if trj._topology is not None:
            trj._topology = topology
        trj._xyz = xyz

        return trj

    unitcell_lengths = unitcell_angles = None
    if trj._have_unitcell:
        unitcell_lengths = trj._unitcell_lengths.copy()
        unitcell_angles = trj._unitcell_angles.copy()
    time = trj._time.copy()

    new_trj = Trajectory(xyz=xyz, topology=topology, time=time,
                      unitcell_lengths=unitcell_lengths,
                      unitcell_angles=unitcell_angles)
    new_trj.forces = forces
    return new_trj
Exemple #14
0
def run_protocol(equilibrium_result: EquilibriumResult,
                 thermodynamic_state: states.ThermodynamicState,
                 alchemical_functions: dict,
                 nstep_neq: int,
                 topology: md.Topology,
                 work_save_interval: int,
                 splitting: str = "V R O H R V",
                 atom_indices_to_save: List[int] = None,
                 trajectory_filename: str = None,
                 write_configuration: bool = False,
                 timestep: unit.Quantity = 1.0 * unit.femtoseconds,
                 measure_shadow_work: bool = False) -> NonequilibriumResult:
    """
    Perform a nonequilibrium switching protocol and return the nonequilibrium protocol work. Note that it is expected
    that this will perform an entire protocol, that is, switching lambda completely from 0 to 1, in increments specified
    by the ne_mc_move. The trajectory that results, along with the work values, will contain n_iterations elements.

    Parameters
    ----------
    equilibrium_result : EquilibriumResult namedtuple
        The result of an equilibrium simulation
    thermodynamic_state : openmmtools.states.ThermodynamicState
        The thermodynamic state at which to run the protocol
    alchemical_functions : dict
        The alchemical functions to use for switching
    nstep_neq : int
        The number of nonequilibrium steps in the protocol
    topology : mdtraj.Topology
        An MDtraj topology for the system to generate trajectories
    work_save_interval : int
        How often to write the work and, if requested, configurations
    splitting : str, default "V R O H R V"
        The splitting string to use for the Langevin integration
    atom_indices_to_save : list of int, default None
        list of indices to save (when excluding waters, for instance). If None, all indices are saved.
    trajectory_filename : str, default None
        Full filepath of output trajectory, if desired. If None, no trajectory file is written.
    write_configuration : bool, default False
        Whether to also write configurations of the trajectory at the requested interval.
    timestep : unit.Quantity, default 1 fs
        The timestep to use in the integrator
    Returns
    -------
    nonequilibrium_result : NonequilibriumResult
        result object containing the trajectory of the nonequilibrium calculation, as well as the cumulative work
        for each frame.
    """
    #get the sampler state needed for the simulation
    sampler_state = equilibrium_result.sampler_state
    temperature = thermodynamic_state.temperature

    #get the atom indices we need to subset the topology and positions
    if atom_indices_to_save is None:
        atom_indices = list(range(topology.n_atoms))
        subset_topology = topology
    else:
        subset_topology = topology.subset(atom_indices_to_save)
        atom_indices = atom_indices_to_save

    ne_mc_move = NonequilibriumSwitchingMove(
        alchemical_functions,
        splitting,
        temperature,
        nstep_neq,
        timestep,
        work_save_interval,
        subset_topology,
        atom_indices,
        save_configuration=write_configuration,
        measure_shadow_work=measure_shadow_work)

    ne_mc_move.reset()

    #apply the nonequilibrium move
    ne_mc_move.apply(thermodynamic_state, sampler_state)

    #get the cumulative work
    cumulative_work = ne_mc_move.cumulative_work

    #get the protocol work
    protocol_work = ne_mc_move.protocol_work

    #if we're measuring shadow work, get that. Otherwise just fill in zeros:
    if measure_shadow_work:
        shadow_work = ne_mc_move.shadow_work
    else:
        shadow_work = np.zeros_like(protocol_work)

    #create a result object and return that
    nonequilibrium_result = NonequilibriumResult(cumulative_work,
                                                 protocol_work, shadow_work)

    #if desired, write nonequilibrium trajectories:
    if trajectory_filename is not None:
        #to get the filename for cumulative work, replace the extension of the trajectory file with .cw.npy
        filepath_parts = trajectory_filename.split(".")
        cw_filepath_parts = copy.deepcopy(filepath_parts)
        pw_filepath_parts = copy.deepcopy(filepath_parts)
        if measure_shadow_work:
            sw_filepath_parts = copy.deepcopy(filepath_parts)
            sw_filepath_parts[-1] = "sw.npy"
            shad_work_filepath = ".".join(sw_filepath_parts)

        cw_filepath_parts[-1] = "cw.npy"
        pw_filepath_parts[-1] = "pw.npy"

        cum_work_filepath = ".".join(cw_filepath_parts)
        prot_work_filepath = ".".join(pw_filepath_parts)

        #if writing configurations was requested, get the trajectory
        if write_configuration:
            try:
                trajectory = ne_mc_move.trajectory
                write_nonequilibrium_trajectory(nonequilibrium_result,
                                                trajectory,
                                                trajectory_filename)
            except NoTrajectoryException:
                pass

        np.save(cum_work_filepath, nonequilibrium_result.cumulative_work)
        np.save(prot_work_filepath, nonequilibrium_result.protocol_work)

        if measure_shadow_work:
            np.save(shad_work_filepath, shadow_work)

    return nonequilibrium_result
Exemple #15
0
def run_equilibrium(
        equilibrium_result: EquilibriumResult,
        thermodynamic_state: states.ThermodynamicState,
        nsteps_equil: int,
        topology: md.Topology,
        n_iterations: int,
        atom_indices_to_save: List[int] = None,
        trajectory_filename: str = None,
        splitting: str = "V R O R V",
        timestep: unit.Quantity = 1.0 * unit.femtoseconds
) -> EquilibriumResult:
    """
    Run nsteps of equilibrium sampling at the specified thermodynamic state and return the final sampler state
    as well as a trajectory of the positions after each application of an MCMove. This means that if the MCMove
    is configured to run 1000 steps of dynamics, and n_iterations is 100, there will be 100 frames in the resulting
    trajectory; these are the result of 100,000 steps (1000*100) of dynamics.

    Parameters
    ----------
    equilibrium_result : EquilibriumResult
       EquilibriumResult namedtuple containing the information necessary to resume
    thermodynamic_state : openmmtools.states.ThermodynamicState
        The thermodynamic state (including context parameters) that should be used
    nsteps_equil : int
        The number of equilibrium steps that a move should make when apply is called
    topology : mdtraj.Topology
        an MDTraj topology object used to construct the trajectory
    n_iterations : int
        The number of times to apply the move. Note that this is not the number of steps of dynamics; it is
        n_iterations*n_steps (which is set in the MCMove).
    splitting: str, default "V R O H R V"
        The splitting string for the dynamics
    atom_indices_to_save : list of int, default None
        list of indices to save (when excluding waters, for instance). If None, all indices are saved.
    trajectory_filename : str, optional, default None
        Full filepath of trajectory files. If none, trajectory files are not written.
    splitting: str, default "V R O H R V"
        The splitting string for the dynamics
    Returns
    -------
    equilibrium_result : EquilibriumResult
        Container namedtuple that has the SamplerState for resuming, an MDTraj trajectory, and the reduced potential of the
        final frame.
    """
    sampler_state = equilibrium_result.sampler_state
    #get the atom indices we need to subset the topology and positions
    if atom_indices_to_save is None:
        atom_indices = list(range(topology.n_atoms))
        subset_topology = topology
    else:
        subset_topology = topology.subset(atom_indices_to_save)
        atom_indices = atom_indices_to_save

    n_atoms = subset_topology.n_atoms

    #construct the MCMove:
    mc_move = mcmc.LangevinSplittingDynamicsMove(n_steps=nsteps_equil,
                                                 splitting=splitting)
    mc_move.n_restart_attempts = 10

    #create a numpy array for the trajectory
    trajectory_positions = np.zeros([n_iterations, n_atoms, 3])
    trajectory_box_lengths = np.zeros([n_iterations, 3])
    trajectory_box_angles = np.zeros([n_iterations, 3])

    #loop through iterations and apply MCMove, then collect positions into numpy array
    for iteration in range(n_iterations):
        mc_move.apply(thermodynamic_state, sampler_state)

        trajectory_positions[iteration, :] = sampler_state.positions[
            atom_indices, :].value_in_unit_system(unit.md_unit_system)

        #get the box lengths and angles
        a, b, c, alpha, beta, gamma = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles(
            *sampler_state.box_vectors)
        trajectory_box_lengths[iteration, :] = [a, b, c]
        trajectory_box_angles[iteration, :] = [alpha, beta, gamma]

    #construct trajectory object:
    trajectory = md.Trajectory(trajectory_positions,
                               subset_topology,
                               unitcell_lengths=trajectory_box_lengths,
                               unitcell_angles=trajectory_box_angles)

    #get the reduced potential from the final frame for endpoint perturbations
    reduced_potential_final_frame = thermodynamic_state.reduced_potential(
        sampler_state)

    #construct equilibrium result object
    equilibrium_result = EquilibriumResult(sampler_state,
                                           reduced_potential_final_frame)

    #If there is a trajectory filename passed, write out the results here:
    if trajectory_filename is not None:
        write_equilibrium_trajectory(equilibrium_result, trajectory,
                                     trajectory_filename)

    return equilibrium_result
Exemple #16
0
def cg_by_index(trj,
                atom_indices_list,
                bead_label_list,
                chain_list=None,
                segment_id_list=None,
                resSeq_list=None,
                inplace=False,
                bonds=None,
                mapping_function="com"):
    """Create a coarse grained (CG) trajectory from subsets of atoms by 
        computing centers of mass of selected sets of atoms.
    Parameters
    ----------
    atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms)
        List of indices of atoms to combine into CG sites
    bead_label_list : list of maximum 4-letter strings to label CG sites
    chain_list : optional list of chain id's to split resulting beads into separate chains
    resSeq_list : optional list of residue sequence id's to assign cg residues
    segment_id_list : optional list of segment id's to assign cg residues
    inplace : bool, default=False
        If ``True``, the operation is done inplace, modifying ``trj``.
        Otherwise, a copy is returned with the sliced atoms, and
        ``trj`` is not modified.
    bonds : array-like,dtype=int, shape=(n_bonds,2), default=None
        If specified, sets these bonds in new topology 
    mapping_function: string, default='com': how to map xyz coordinates
        options: %s

    Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, 
        those sections most be broken into separate chains or an incorrect topology will result
 
    Returns
    -------
    traj : md.Trajectory
        The return value is either ``trj``, or the new trajectory,
        depending on the value of ``inplace``.
    """ % mapping_options.keys()
    if not len(atom_indices_list) == len(bead_label_list):
        raise ValueError(
            "Must supply a list of bead labels of the same length as a list of selected atom indices"
        )
    for bead_label in bead_label_list:
        if not (type(bead_label) is
                str) or len(bead_label) > 4 or len(bead_label) < 1:
            raise ValueError(
                "Specified bead label '%s' is not valid, must be a string between 1 and 4 characters"
                % bead_label)
    bead_label_list = [bead_label.upper() for bead_label in bead_label_list]

    if mapping_function not in mapping_options:
        raise ValueError("Must select a mapping function from: %s" %
                         mapping_options.keys())
    map_coords = mapping_options[mapping_function]

    if chain_list is None:
        chain_list = np.ones(len(atom_indices_list), dtype=int)
    elif len(chain_list) != len(atom_indices_list):
        raise ValueError(
            "Supplied chain_list must be of the same length as a list of selected atom indices"
        )

    if segment_id_list is not None and len(segment_id_list) != len(
            atom_indices_list):
        raise ValueError(
            "Supplied segment_id_list must be of the same length as a list of selected atom indices"
        )

    if resSeq_list is not None and len(resSeq_list) != len(atom_indices_list):
        raise ValueError(
            "Supplied resSeq_list must be of the same length as a list of selected atom indices"
        )

    n_beads = len(atom_indices_list)
    xyz = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]),
                   dtype=trj.xyz.dtype,
                   order='C')
    forces = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]),
                      dtype=np.double,
                      order='C')
    columns = ["serial", "name", "element", "resSeq", "resName", "chainID"]
    masses = np.array([
        np.sum([a.mass for a in trj.top.atoms if a.index in atom_indices])
        for atom_indices in atom_indices_list
    ],
                      dtype=np.float64)
    charges = np.array([
        np.sum([a.charge for a in trj.top.atoms if a.index in atom_indices])
        for atom_indices in atom_indices_list
    ],
                       dtype=np.float64)

    topology_labels = []
    element_label_dict = {}

    xyz_i = np.zeros((trj.xyz.shape[0], trj.xyz.shape[2]),
                     dtype=trj.xyz.dtype,
                     order='C')

    for i in range(n_beads):
        atom_indices = atom_indices_list[i]
        bead_label = bead_label_list[i]
        #xyz_i = map_coords(trj,atom_indices)

        masses_i = np.array(
            [a.mass for a in trj.top.atoms if a.index in atom_indices_list[i]],
            dtype=np.float64)

        map_coords(xyz_i,
                   trj.xyz,
                   atom_indices,
                   masses_i,
                   unitcell_lengths=trj.unitcell_lengths)

        xyz[:, i, :] = xyz_i

        if "forces" in trj.__dict__ and len(trj.forces) > 0:
            forces_i = map_forces(trj, atom_indices)
            forces[:, i, :] = forces_i

        if resSeq_list is not None:
            resSeq = resSeq_list[i]
        else:
            resSeq = i + 1

        #element_label='%4s'%('B%i'%(resSeq))
        if not bead_label in element_label_dict:
            element_label = '%2s' % ('B%i' % (len(element_label_dict) % 10))
            element_label_dict[bead_label] = element_label
        else:
            element_label = element_label_dict[bead_label]

        if element_label.strip().upper(
        ) not in element.Element._elements_by_symbol:
            element.Element(1000 + resSeq, element_label, element_label,
                            masses[i], 1.0)

        topology_labels.append([
            i, bead_label, element_label, resSeq,
            '%3s' % bead_label, chain_list[i]
        ])

    df = pd.DataFrame(topology_labels, columns=columns)
    topology = Topology.from_dataframe(df, bonds=bonds)

    if segment_id_list is not None:
        for beadidx, bead in enumerate(topology.atoms):
            bead.residue.segment_id = segment_id_list[beadidx]

    if inplace:
        if trj._topology is not None:
            trj._topology = topology
        trj._xyz = xyz

        return trj

    unitcell_lengths = unitcell_angles = None
    if trj._have_unitcell:
        unitcell_lengths = trj._unitcell_lengths.copy()
        unitcell_angles = trj._unitcell_angles.copy()
    time = trj._time.copy()

    new_trj = Trajectory(xyz=xyz,
                         topology=topology,
                         time=time,
                         unitcell_lengths=unitcell_lengths,
                         unitcell_angles=unitcell_angles)
    new_trj.forces = forces
    return new_trj
Exemple #17
0
class Mapper:
    """
    An object to convert an atomistic system to a CG system

    Attributes
    ----------
    mappings : dict
        A dictionary containing the {name : mapping} for each residue

    solvent_mapping : int, default=4
        Number of solvent molecules to map to a single bead via k-means
        clustering

    solvent_name : string, default='tip3p'
        Name of solvent residue in atomistic system

    aa_traj : mdtraj.Trajectory
        The atomistic trajectory to convert to CG

    aa_top: mdtraj.Topology
        The atomistic topology to convert to CG

    cg_traj : mdtraj.Trajectory
        The CG trajectory to converted from atomistic

    cg_top: mdtraj.Topology
        The CG topology to convert from atomistic

    """
    def __init__(self, solvent_name='tip3p', solvent_mapping=4):
        self._mappings = dict()
        self._solvent_mapping = solvent_mapping
        self._solvent_name = solvent_name
        self._cg_traj = None
        self._cg_top  = None


    @property
    def mappings(self):
        return deepcopy(self._mappings)


    @mappings.setter
    def mappings(self):
        raise TypeError("'mappings' attribute does not support assignment")

    def load_trajectory(self, trajectory):
        self._aa_traj = trajectory
        self._aa_top = trajectory.top


    def load_mapping_dir(self, mapping_dir=None, **kwargs):
        """
        Load all mapping files from a directory.

        Arguments:
        ----------
        mapping_dir : string, default=None
            Path to the directory containing mapping files. Loads from
            the internal `mappings` directory by default
        **kwargs : keyword arguments
            Keyword arguments to pass to mapping_dir. Namely the ff arg.

        """
        if mapping_dir is None:
            mapping_dir = default_mapping_dir(**kwargs)

        assert path.exists(mapping_dir)

        for filename in glob.glob("{}/*map".format(mapping_dir)):
            self.load_mapping(filename)


    def load_mapping(self, filename_or_mapping):
        """
        Load a single mapping file from disk.

        Arguments:
        ----------
        filename : string or ResMapping
            Path to the mapping file or ResMapping object to add to library

        """
        if isinstance(filename_or_mapping, ResMapping):
            self._mappings.update({
                filename_or_mapping.name : filename_or_mapping})
        else:
            assert path.exists(filename_or_mapping)

            name = path.basename(filename_or_mapping).split(".")[0]
            mapping = ResMapping.load(name, filename_or_mapping)
            self._mappings.update({name : mapping})


    def cg_map(self):
        """
        Execute full CG mapping pipeline and return the CG trajectory

        """

        if self._cg_traj is None:
            self._map_topology()
            self._convert_xyz()
            self._construct_traj()

        return self._cg_traj


    def _map_topology(self):
        """
        Create CG topology from given topology and mapping

        """

        # Ensure that a trajectory has been loaded
        if self._aa_traj is None:
            raise OutOfOrderError("An atomistic trajectory has not "
                                  "been loaded into this Mapper yet.")


        self._atom_bead_mapping = dict()
        self._cg_top = Topology()
        self._solvent_counter = 0

        # Loop over all residues
        for residue in self._aa_top.residues:
            if residue.name == self._solvent_name:
                self._map_solvent_top(residue)
            else:
                self._map_nonsolvent_top(residue)


    def _map_solvent_top(self, residue):
        """
        Create CG solvent residue from given residue and add it to the
        CG topology.

        Arguments:
        ----------
        residue: mdtraj.topology.Residue
            The atomistic residue to be mapped to CG

        """
        self._solvent_counter += 1
        if self._solvent_counter % self._solvent_mapping == 0:
            cg_residue = self._cg_top.add_residue(self._solvent_name,
                                                  self._cg_top.add_chain())
            cg_bead = CGBead(bead_type=self._solvent_name)
            mdtraj_bead = self._cg_top.add_atom(self._solvent_name, None,
                                                cg_residue)
            self._atom_bead_mapping[mdtraj_bead] = cg_bead
            return cg_residue


    def _map_nonsolvent_top(self, residue):
        """
        Create CG non-solvent residue from given residue and add it to
        the CG topology.

        Arguments:
        ----------
        residue: mdtraj.topology.Residue
            The atomistic residue to be mapped to CG

        """

        # Obtain the correct molecule mapping based on the residue
        res_mapping = self._mappings[residue.name]

        # Add an empty residue to the CG topology
        cg_residue = self._cg_top.add_residue(
                            residue.name,
                            self._cg_top.add_chain())

        # Make a list of atoms in the residue
        atoms = np.array([atom.index for atom in residue.atoms])

        # Make an empty list to store beads
        cg_beads = []

        # Create CG beads for each bead in the mapping
        for bead in res_mapping.beads:
            bead_atoms = atoms.take(bead.mapping_indices)
            cg_bead = CGBead(bead_type=bead.name, atom_indices=bead_atoms)
            mdtraj_bead = self._cg_top.add_atom(cg_bead.bead_type, None,
                                                cg_residue)
            cg_beads.append(mdtraj_bead)
            self._atom_bead_mapping[mdtraj_bead] = cg_bead

        # Add bonds to topology
        for index_i, index_j in res_mapping.bonds:
            self._cg_top.add_bond(cg_beads[int(index_i)],
                                    cg_beads[int(index_j)])

        return cg_residue


    def _convert_xyz(self):
        """
        Take atomistic trajectory and convert to CG trajectory

        """

        cg_xyz = []
        for bead in self._cg_top.atoms:
            if bead.name == self._solvent_name:
                bead_xyz = np.zeros((self._aa_traj.n_frames,3))
            else:
                atom_indices = self._atom_bead_mapping[bead].atom_indices
                masses = np.array([self._aa_top.atom(i).element.mass
                                   for i in atom_indices])
                bead_xyz = (np.sum((self._aa_traj.xyz[:,atom_indices,:]
                                    * masses[None,:,None]), axis=1) /
                            np.sum(masses))

            cg_xyz.append(bead_xyz)

        cg_xyz = np.array(cg_xyz)
        cg_xyz = np.swapaxes(cg_xyz, 0, 1)

        # Figure out at which coarse grain index the waters start
        # Perform kmeans, frame-by-frame, over all water residues
        # Workers will return centers of masses of clusters, frame index, and cg index
        # Master will assign to CG_xyz
        if self._solvent_counter > 0:
            with Pool(cpu_count()) as pool:
                chunksize = int(self._aa_traj.n_frames / cpu_count()) + 1
                args = list(zip(self._aa_traj,
                                [self._solvent_mapping]*self._aa_traj.n_frames,
                                [self._solvent_name]*self._aa_traj.n_frames))
                coms = pool.starmap(_map_solvent, args, chunksize)

            pool.join()

            coms = np.squeeze(np.array(coms))
            cg_xyz[:,self._cg_top.select(f"name {self._solvent_name}"),:] = coms

        self._cg_xyz = cg_xyz


    def _construct_traj(self):
        """
        Create an mdtraj.Trajectory from the CG topology and xyz.

        """

        cg_traj = Trajectory(self._cg_xyz,
                             self._cg_top,
                             time=self._aa_traj.time,
                             unitcell_lengths=self._aa_traj.unitcell_lengths,
                             unitcell_angles=self._aa_traj.unitcell_angles)

        self._cg_traj = cg_traj
Exemple #18
0
def cg_by_index(trj,
                atom_indices_list,
                bead_label_list,
                chain_list=None,
                segment_id_list=None,
                resSeq_list=None,
                inplace=False,
                bonds=None,
                split_shared_atoms=False,
                mod_weights_list=None,
                mapping_function="com",
                charge_tol=1e-5,
                center_postwrap=False):
    """Create a coarse grained (CG) trajectory from subsets of atoms by
        computing centers of mass of selected sets of atoms.
    Parameters
    ----------
    atom_indices_list :
        list of array-like, dtype=int, shape=(n_beads,n_atoms)
        List of indices of atoms to combine into CG sites
    bead_label_list :
        list of maximum 4-letter strings to label CG sites
    chain_list :
        optional list of chain id's to split resulting beads into separate
        chains
    resSeq_list :
        optional list of residue sequence id's to assign cg residues
    segment_id_list :
        optional list of segment id's to assign cg residues
    inplace :
        bool, default=False
        If ``True``, the operation is done inplace, modifying ``trj``.
        Otherwise, a copy is returned with the sliced atoms, and
        ``trj`` is not modified.
    bonds : array-like,dtype=int, shape=(n_bonds,2), default=None
        If specified, sets these bonds in new topology
    split_shared_atoms: boolean
        If specified, check to see if atoms are shared per molecule in beads. If
        so, equally divide their weight accordingly for each bead.
    mapping_function: string, default='com': how to map xyz coordinates
        options: %s
    center_postwrap: Boolean
        Whether to wrap the CG system after it is mapped. Assumes that box is
        centered at 0, and only has effect if periodic information is present.

    Note - If repeated resSeq values are used, as for a repeated motiff
        in a CG polymer, those sections most be broken into separate
        chains or an incorrect topology will result

    Returns
    -------
    traj : md.Trajectory
        The return value is either ``trj``, or the new trajectory,
        depending on the value of ``inplace``.
    """ % mapping_options.keys()

    if not len(atom_indices_list) == len(bead_label_list):
        raise ValueError("Must supply a list of bead labels of the "
                         "same length as a list of selected atom indices")
    for bead_label in bead_label_list:
        if not (type(bead_label) is
                str) or len(bead_label) > 4 or len(bead_label) < 1:
            raise ValueError("Specified bead label '%s' is not valid, \
                             must be a string between 1 and 4 characters" %
                             bead_label)

    bead_label_list = [bead_label.upper() for bead_label in bead_label_list]

    if mapping_function not in mapping_options:
        raise ValueError("Must select a mapping function from: %s"\
                         %mapping_options.keys())

    if chain_list is None:
        chain_list = np.ones(len(atom_indices_list), dtype=int)
    elif len(chain_list) != len(atom_indices_list):
        raise ValueError("Supplied chain_list must be of the same length "
                         "as a list of selected atom indices")

    if segment_id_list is not None and len(segment_id_list) != len(
            atom_indices_list):
        raise ValueError("Supplied segment_id_list must be of the same "
                         "length as a list of selected atom indices")

    if resSeq_list is not None and len(resSeq_list) != len(atom_indices_list):
        raise ValueError("Supplied resSeq_list must be of the same "
                         "length as a list of selected atom indices")

    n_beads = len(atom_indices_list)

    xyz = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]),
                   dtype=trj.xyz.dtype,
                   order='C')

    forces = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]),
                      dtype=np.double,
                      order='C')

    columns = ["serial", "name", "element", "resSeq", "resName", "chainID"]

    #total masse for each cg bead.
    masses = np.zeros((n_beads), dtype=np.float64)
    #list of masses for elements in cg bead.
    masses_i = []
    #masses
    for ii in range(n_beads):
        #atoms in curent cg bead.
        atom_indices = atom_indices_list[ii]
        #first, construct lists of masses in current cg bead.
        temp_masses = np.array([])
        for jj in atom_indices:
            temp_masses = np.append(temp_masses, trj.top.atom(jj).element.mass)

        masses_i.append(temp_masses)
        masses[ii] = masses_i[ii].sum()

    if hasattr(trj.top.atom(1), 'charge'):
        #total charge for each cg bead.
        charges = np.zeros((n_beads), dtype=np.float64)
        #lists of charges for in current cg bead
        charges_i = []

        #charges
        for ii in range(n_beads):

            #atoms in curent cg bead.
            atom_indices = atom_indices_list[ii]

            #first, construct lists of masses in current cg bead.
            temp_charges = np.array([])

            for jj in atom_indices:
                temp_charges = np.append(temp_charges, trj.top.atom(jj).charge)

            charges_i.append(temp_charges)
            charges[ii] = charges_i[ii].sum()

    forcenorm_i = []
    if mapping_function == 'cof' or mapping_function == 'center_of_force':
        for ii in range(n_beads):
            atom_indices = atom_indices_list[ii]
            forcenorm_i.append(get_forcenorms(trj, atom_indices))

    if mapping_function == 'coc' or mapping_function == 'center_of_charge':
        for charge in charges:
            if np.absolute(charge) < charge_tol:
                raise ValueError("Total charge on site %i is near zero" % ii)

    topology_labels = []
    element_label_dict = {}

    if (split_shared_atoms):
        mod_weights_list = gen_unique_overlap_mod_weights(atom_indices_list)

    has_forces = False
    try:
        trj.__dict__['forces']
        test_forces = map_forces(trj, (0, ))
        has_forces = True
    except TypeError:
        print("WARNING: Invalid Forces\nNo Map applied to forces")
    except KeyError:
        pass
    except:
        print("Unknown error, check your forces\nexiting...")
        raise

    for i in range(n_beads):
        atom_indices = atom_indices_list[i]
        bead_label = bead_label_list[i]
        xyz_i = xyz[:, i, :]

        if mapping_function == 'coc' or mapping_function == 'center_of_charge':
            weights = charges_i[i]
        elif mapping_function == 'com' or mapping_function == 'center_of_mass':
            weights = masses_i[i]
        elif mapping_function == 'cof' or mapping_function == 'center_of_force':
            weights = forcenorm_i[i]
        elif mapping_function == 'center':
            weights = np.ones(len(atom_indices))

        if (mod_weights_list is not None):
            weights[:] = np.multiply(weights, mod_weights_list[i])

        compute_center_weighted(xyz_i,
                                trj.xyz,
                                atom_indices,
                                weights,
                                unitcell_lengths=trj.unitcell_lengths,
                                center_postwrap=center_postwrap)

        if has_forces:
            forces_i = map_forces(trj, atom_indices)
            forces[:, i, :] = forces_i

        if resSeq_list is not None:
            resSeq = resSeq_list[i]
        else:
            resSeq = i + 1

        #element_label='%4s'%('B%i'%(resSeq))
        if not bead_label in element_label_dict:
            element_label = '%2s' % ('B%i' % (len(element_label_dict) % 10))
            element_label_dict[bead_label] = element_label
        else:
            element_label = element_label_dict[bead_label]

        if element_label.strip().upper(
        ) not in element.Element._elements_by_symbol:
            element.Element(1000 + resSeq, element_label, element_label,
                            masses[i], 1.0)

        topology_labels.append([
            i, bead_label, element_label, resSeq,
            '%3s' % bead_label, chain_list[i]
        ])

    df = pd.DataFrame(topology_labels, columns=columns)
    topology = Topology.from_dataframe(df, bonds=bonds)

    if segment_id_list is not None:
        for beadidx, bead in enumerate(topology.atoms):
            bead.residue.segment_id = segment_id_list[beadidx]

    if inplace:
        if trj._topology is not None:
            trj._topology = topology
        trj._xyz = xyz

        return trj

    unitcell_lengths = unitcell_angles = None
    if trj._have_unitcell:
        unitcell_lengths = trj._unitcell_lengths.copy()
        unitcell_angles = trj._unitcell_angles.copy()

    time = trj._time.copy()

    new_trj = Trajectory(xyz=xyz,
                         topology=topology,
                         time=time,
                         unitcell_lengths=unitcell_lengths,
                         unitcell_angles=unitcell_angles)

    new_trj.forces = forces
    return new_trj
Exemple #19
0
def map_molecules(trj,
                  selection_list,
                  bead_label_list,
                  transfer_labels=False,
                  molecule_types=None,
                  molecule_type_order=False,
                  return_call=False,
                  *args,
                  **kwargs):
    """ This performs the mapping where each molecule has been assigned a
    type.

    Parameters
    ----------
    traj : Trajectory
        Trajectory to sum forces on
    selection_list :
        Indexible collection of strings
    bead_label_list :
        Indexible collection
    transfer_labels :
        Whether to transfer over labels in @trj. Moves over resSeq, resName
        for every bead, assuming that the atoms in each bead are uniform in
        those qualities.
    molecule_types :
        Indexible collection of integers
    molecule_type_order : boolean
        Specifying molecule_type_order means that the map will be
        reordered so that all molecules of type 0 come first, then 1, etc.
    return_call: boolean
        Whether to return the arguments that cg_by_index would be called with
        instead of actually calling it. Useful for modifying the call.

    Returns
    -------
    traj: trajectory
        trajectory formed by applying given molecular map.
    -OR-
    tuple: list of arguments which would be passed to cg_by_index
    """

    ### First, deal with optional arguments and argument validation.
    if molecule_type_order is True:
        raise ValueError("molecule_type_order not currently supported.")

    #if the array of molecule types isn't given, assume 1 molecule type.
    if molecule_types is None:
        molecule_types = [0] * trj.top.n_residues

    n_molecule_types = len(selection_list)

    if sorted(set(molecule_types)) != list(range(n_molecule_types)):
        raise ValueError("Error in map molecules, molecule types list must "
                         "contain only and all numbers from 0 to "
                         "n_molecule_types-1.")


#    if len(molecule_types) != trj.top.n_residues:
#        raise ValueError("Error in map molecules, molecule types list must "
#                         "have the same length as number of residues.")

    if len(selection_list) != len(bead_label_list):
        raise ValueError("Error in map molecules, must submit selection list "
                         "and bead label list of same length.")

    for i in range(n_molecule_types):
        if len(selection_list[i]) != len(bead_label_list[i]):
            raise ValueError("Error in map molecules, selection list %i and "
                             "bead label list %i must be of same length." %
                             (i, i))

    ### generate the indices local to each molecule for mapping

    # get the first molecule index for each molecule type
    first_molecules = [
        molecule_types.index(i) for i in range(n_molecule_types)
    ]

    internal_indices_list = [[] for i in range(n_molecule_types)]
    iterable = zip(selection_list, first_molecules, internal_indices_list)
    for selection, first_mol, mol_indices in iterable:
        first_index = trj.top.select("(resid == %i)" % (first_mol)).min()

        for sel in selection:
            has_index = sel.find("index") > -1
            has_name = sel.find("name") > -1
            internal_indices = []
            if has_index and has_name:
                raise ValueError("Error in map molecules, do not specify "
                                 "selection by index and by type.")
            elif has_index:
                # use atom selection language to parse selection
                #string containing only indices on whole system, then offset later
                internal_indices = trj.top.select("%s" % (sel))

            elif has_name:
                # have to un-shift list because this will be added to current id later
                filter_string = "(resid == %i) and (%s)" % (first_mol, sel)
                internal_indices = trj.top.select(filter_string) - first_index

            if len(internal_indices) == 0:
                raise ValueError(
                    "Error in map_molecules, selection string '%s'"
                    "produced an empty list of atom indices" % sel)

            mol_indices.append(internal_indices)

    # get list of type [ (0,r0), (1,r1) etc ]
    if molecule_type_order is True:
        residue_list = sorted( enumerate(trj.top.residues),\
                               key=lambda x: molecule_types[x[0]])
    else:
        residue_list = enumerate(trj.top.residues)

    index_list = []
    resSeq_list = []
    label_list = []

    start_index = 0
    resSeq = 1
    for ridx, r in residue_list:
        molecule_type = molecule_types[ridx]
        for bead_idx, internal_indices in enumerate(
                internal_indices_list[molecule_type]):
            system_indices = internal_indices + start_index
            index_list.append(system_indices)
            resSeq_list.append(resSeq)
            label_list.append(bead_label_list[molecule_type][bead_idx])
        resSeq = resSeq + 1
        start_index = start_index + r.n_atoms

    if (return_call is True):
        arg_list = [trj, index_list, label_list]
        arg_list.extend(args)
        arg_list.append(kwargs)
        return (arg_list)
        #exit early.

    cg_trj = cg_by_index(trj, index_list, label_list, *args, **kwargs)

    #do a more sophisticated labeling.
    if (transfer_labels is True):

        df_aa_top = trj.top.to_dataframe()[0]
        df_cg_top = cg_trj.top.to_dataframe()[0]

        #get resSeq info.
        aa_resSeq = df_aa_top.loc[:, 'resSeq']

        #find atom indices for first atoms of each residue.
        res_starting_indices = \
            np.sort(np.unique(aa_resSeq,return_index=True)[1])

        #get resids and resnames for startings atoms.
        aa_starting_resids = df_aa_top.loc[res_starting_indices, 'resSeq']
        aa_starting_resnames = df_aa_top.loc[res_starting_indices, 'resName']

        #needed for duplicating atomistic info across cg molecules
        n_sites_per_cg = [len(desc) for desc in bead_label_list]

        #generate and place resids
        cg_resids = typed_elementwise_rep(aa_starting_resids, molecule_types,
                                          n_sites_per_cg)
        df_cg_top.loc[:, "resSeq"] = cg_resids

        #generate and place resNames
        cg_resnames = typed_elementwise_rep(aa_starting_resnames,
                                            molecule_types, n_sites_per_cg)
        df_cg_top.loc[:, "resName"] = cg_resnames

        #convert and put back.
        cg_trj.top = Topology.from_dataframe(df_cg_top)

    return (cg_trj)
Exemple #20
0
def render_traj(topology, positions):
    traj = Trajectory(positions / unit.nanometers,
                      Topology.from_openmm(topology))
    return (show_mdtraj(traj).add_ball_and_stick('all').center_view(zoom=True))
Exemple #21
0
def run_protocol(equilibrium_result: EquilibriumResult, thermodynamic_state: states.ThermodynamicState,
                 alchemical_functions: dict, nstep_neq: int, topology: md.Topology, work_save_interval: int, splitting: str="V R O H R V",
                 atom_indices_to_save: List[int] = None, trajectory_filename: str = None, write_configuration: bool = False, timestep: unit.Quantity=1.0*unit.femtoseconds, measure_shadow_work: bool=False) -> NonequilibriumResult:
    """
    Perform a nonequilibrium switching protocol and return the nonequilibrium protocol work. Note that it is expected
    that this will perform an entire protocol, that is, switching lambda completely from 0 to 1, in increments specified
    by the ne_mc_move. The trajectory that results, along with the work values, will contain n_iterations elements.

    Parameters
    ----------
    equilibrium_result : EquilibriumResult namedtuple
        The result of an equilibrium simulation
    thermodynamic_state : openmmtools.states.ThermodynamicState
        The thermodynamic state at which to run the protocol
    alchemical_functions : dict
        The alchemical functions to use for switching
    nstep_neq : int
        The number of nonequilibrium steps in the protocol
    topology : mdtraj.Topology
        An MDtraj topology for the system to generate trajectories
    work_save_interval : int
        How often to write the work and, if requested, configurations
    splitting : str, default "V R O H R V"
        The splitting string to use for the Langevin integration
    atom_indices_to_save : list of int, default None
        list of indices to save (when excluding waters, for instance). If None, all indices are saved.
    trajectory_filename : str, default None
        Full filepath of output trajectory, if desired. If None, no trajectory file is written.
    write_configuration : bool, default False
        Whether to also write configurations of the trajectory at the requested interval.
    timestep : unit.Quantity, default 1 fs
        The timestep to use in the integrator
    Returns
    -------
    nonequilibrium_result : NonequilibriumResult
        result object containing the trajectory of the nonequilibrium calculation, as well as the cumulative work
        for each frame.
    """
    #get the sampler state needed for the simulation
    sampler_state = equilibrium_result.sampler_state
    temperature = thermodynamic_state.temperature

    #get the atom indices we need to subset the topology and positions
    if atom_indices_to_save is None:
        atom_indices = list(range(topology.n_atoms))
        subset_topology = topology
    else:
        subset_topology = topology.subset(atom_indices_to_save)
        atom_indices = atom_indices_to_save

    ne_mc_move = NonequilibriumSwitchingMove(alchemical_functions, splitting, temperature, nstep_neq, timestep, work_save_interval, subset_topology, atom_indices, save_configuration=write_configuration, measure_shadow_work=measure_shadow_work)

    ne_mc_move.reset()

    #apply the nonequilibrium move
    ne_mc_move.apply(thermodynamic_state, sampler_state)

    #get the cumulative work
    cumulative_work = ne_mc_move.cumulative_work

    #get the protocol work
    protocol_work = ne_mc_move.protocol_work

    #if we're measuring shadow work, get that. Otherwise just fill in zeros:
    if measure_shadow_work:
        shadow_work = ne_mc_move.shadow_work
    else:
        shadow_work = np.zeros_like(protocol_work)

    #create a result object and return that
    nonequilibrium_result = NonequilibriumResult(cumulative_work, protocol_work, shadow_work)

    #if desired, write nonequilibrium trajectories:
    if trajectory_filename is not None:
        #to get the filename for cumulative work, replace the extension of the trajectory file with .cw.npy
        filepath_parts = trajectory_filename.split(".")
        cw_filepath_parts = copy.deepcopy(filepath_parts)
        pw_filepath_parts = copy.deepcopy(filepath_parts)
        if measure_shadow_work:
            sw_filepath_parts = copy.deepcopy(filepath_parts)
            sw_filepath_parts[-1] = "sw.npy"
            shad_work_filepath = ".".join(sw_filepath_parts)

        cw_filepath_parts[-1] = "cw.npy"
        pw_filepath_parts[-1] = "pw.npy"

        cum_work_filepath = ".".join(cw_filepath_parts)
        prot_work_filepath = ".".join(pw_filepath_parts)

        #if writing configurations was requested, get the trajectory
        if write_configuration:
            try:
                trajectory = ne_mc_move.trajectory
                write_nonequilibrium_trajectory(nonequilibrium_result, trajectory, trajectory_filename)
            except NoTrajectoryException:
                pass

        np.save(cum_work_filepath, nonequilibrium_result.cumulative_work)
        np.save(prot_work_filepath, nonequilibrium_result.protocol_work)

        if measure_shadow_work:
            np.save(shad_work_filepath, shadow_work)

    return nonequilibrium_result
Exemple #22
0
def run_equilibrium(equilibrium_result: EquilibriumResult, thermodynamic_state: states.ThermodynamicState,
                    nsteps_equil: int, topology: md.Topology, n_iterations : int,
                    atom_indices_to_save: List[int] = None, trajectory_filename: str = None, splitting: str="V R O R V", timestep: unit.Quantity=1.0*unit.femtoseconds) -> EquilibriumResult:
    """
    Run nsteps of equilibrium sampling at the specified thermodynamic state and return the final sampler state
    as well as a trajectory of the positions after each application of an MCMove. This means that if the MCMove
    is configured to run 1000 steps of dynamics, and n_iterations is 100, there will be 100 frames in the resulting
    trajectory; these are the result of 100,000 steps (1000*100) of dynamics.

    Parameters
    ----------
    equilibrium_result : EquilibriumResult
       EquilibriumResult namedtuple containing the information necessary to resume
    thermodynamic_state : openmmtools.states.ThermodynamicState
        The thermodynamic state (including context parameters) that should be used
    nsteps_equil : int
        The number of equilibrium steps that a move should make when apply is called
    topology : mdtraj.Topology
        an MDTraj topology object used to construct the trajectory
    n_iterations : int
        The number of times to apply the move. Note that this is not the number of steps of dynamics; it is
        n_iterations*n_steps (which is set in the MCMove).
    splitting: str, default "V R O H R V"
        The splitting string for the dynamics
    atom_indices_to_save : list of int, default None
        list of indices to save (when excluding waters, for instance). If None, all indices are saved.
    trajectory_filename : str, optional, default None
        Full filepath of trajectory files. If none, trajectory files are not written.
    splitting: str, default "V R O H R V"
        The splitting string for the dynamics
    Returns
    -------
    equilibrium_result : EquilibriumResult
        Container namedtuple that has the SamplerState for resuming, an MDTraj trajectory, and the reduced potential of the
        final frame.
    """
    sampler_state = equilibrium_result.sampler_state
    #get the atom indices we need to subset the topology and positions
    if atom_indices_to_save is None:
        atom_indices = list(range(topology.n_atoms))
        subset_topology = topology
    else:
        subset_topology = topology.subset(atom_indices_to_save)
        atom_indices = atom_indices_to_save

    n_atoms = subset_topology.n_atoms

    #construct the MCMove:
    mc_move = mcmc.LangevinSplittingDynamicsMove(n_steps=nsteps_equil, splitting=splitting)
    mc_move.n_restart_attempts = 10

    #create a numpy array for the trajectory
    trajectory_positions = np.zeros([n_iterations, n_atoms, 3])
    trajectory_box_lengths = np.zeros([n_iterations, 3])
    trajectory_box_angles = np.zeros([n_iterations, 3])

    #loop through iterations and apply MCMove, then collect positions into numpy array
    for iteration in range(n_iterations):
        mc_move.apply(thermodynamic_state, sampler_state)

        trajectory_positions[iteration, :] = sampler_state.positions[atom_indices, :].value_in_unit_system(unit.md_unit_system)

        #get the box lengths and angles
        a, b, c, alpha, beta, gamma = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles(*sampler_state.box_vectors)
        trajectory_box_lengths[iteration, :] = [a, b, c]
        trajectory_box_angles[iteration, :] = [alpha, beta, gamma]

    #construct trajectory object:
    trajectory = md.Trajectory(trajectory_positions, subset_topology, unitcell_lengths=trajectory_box_lengths, unitcell_angles=trajectory_box_angles)

    #get the reduced potential from the final frame for endpoint perturbations
    reduced_potential_final_frame = thermodynamic_state.reduced_potential(sampler_state)

    #construct equilibrium result object
    equilibrium_result = EquilibriumResult(sampler_state, reduced_potential_final_frame)

    #If there is a trajectory filename passed, write out the results here:
    if trajectory_filename is not None:
        write_equilibrium_trajectory(equilibrium_result, trajectory, trajectory_filename)

    return equilibrium_result