Exemple #1
0
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames):
    """Build topology object from the arrays stored in the lh5 file"""
    # Delayed import due to wacky recursive imports in compatibilty
    from mdtraj import Topology
    topology = Topology()

    # assert that the ChainID is just an array of empty strings, which appears
    # to be the case in our test systems for this legacy format
    if not np.all(chainid == '' for chainid in ChainID):
        raise NotImplementedError('Im not prepared to parse multiple chains')
    chain0 = topology.add_chain()

    # register the residues
    registered_residues = {}
    for i in np.argsort(ResidueID):
        residue_name = ResidueNames[i]
        if not isinstance(residue_name, basestring):
            residue_name = residue_name.decode()
        if ResidueID[i] not in registered_residues:
            res = topology.add_residue(residue_name, chain0)
            registered_residues[ResidueID[i]] = res

    # register the atoms
    for i in np.argsort(AtomID):
        atom_name = AtomNames[i]
        if not isinstance(atom_name, basestring):
            atom_name = atom_name.decode()
        element_symbol = atom_name.lstrip('0123456789')[0]
        element = mdtraj.pdb.element.get_by_symbol(element_symbol)
        topology.add_atom(atom_name, element,
                          registered_residues[ResidueID[i]])

    topology.create_standard_bonds()
    return topology
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames):
    topology = Topology()

    # assert that the ChainID is just an array of empty strings, which appears
    # to be the case in our test systems for this legacy format
    assert np.all(chainid == '' for chainid in ChainID), 'Im not prepaed to parse multiple chains'
    chain0 = topology.add_chain()


    # register the residues
    registered_residues = {}
    for i in np.argsort(ResidueID):
        residue_name = ResidueNames[i]
        if not isinstance(residue_name, str):
            residue_name = residue_name.decode()
        if ResidueID[i] not in registered_residues:
            res = topology.add_residue(residue_name, chain0)
            registered_residues[ResidueID[i]] = res

    # register the atoms
    for i in np.argsort(AtomID):
        atom_name = AtomNames[i]
        if not isinstance(atom_name, str):
            atom_name = atom_name.decode()
        element_symbol = atom_name.lstrip('0123456789')[0]
        element = mdtraj.pdb.element.get_by_symbol(element_symbol)
        topology.add_atom(atom_name, element,
                         registered_residues[ResidueID[i]])

    topology.create_standard_bonds()
    return topology
Exemple #3
0
    def add_mol_to_topology(self, coords: np.ndarray, types: np.ndarray,
                            topology: mdtraj.Topology):
        assert coords.shape[0] == types.shape[0]
        assert coords.ndim == 2
        chain = topology.add_chain()

        # Convert types to symbols
        if types.ndim == 2:
            seqs = np.argmax(types.copy(), axis=1)
            atms = [self.sequential_to_atomic_number()[t] for t in seqs]
            syms = [atomic_number_to_symbol()[t] for t in atms]
        elif types.ndim == 1:
            syms = [atomic_number_to_symbol()[t] for t in types]
        else:
            raise ValueError(
                "Types must either be one hot vectors with ndim==2 XOR numbers with ndim==1."
            )

        for i, s in enumerate(syms):
            res = topology.add_residue("mol_{}".format(i), chain)
            topology.add_atom(s, mdtraj.element.get_by_symbol(s), res)
Exemple #4
0
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames):
    """Build topology object from the arrays stored in the lh5 file"""
    # Delayed import due to wacky recursive imports in compatibilty
    from mdtraj import Topology
    topology = Topology()

    # assert that the ChainID is just an array of empty strings, which appears
    # to be the case in our test systems for this legacy format
    if not np.all(chainid == '' for chainid in ChainID):
        raise NotImplementedError('Im not prepared to parse multiple chains')
    chain0 = topology.add_chain()

    # register the residues
    registered_residues = {}
    for i in np.argsort(ResidueID):
        residue_name = ResidueNames[i]
        if not isinstance(residue_name, basestring):
            residue_name = residue_name.decode()
        if ResidueID[i] not in registered_residues:
            res = topology.add_residue(residue_name, chain0)
            registered_residues[ResidueID[i]] = res

    # register the atoms
    for i in np.argsort(AtomID):
        atom_name = AtomNames[i]
        if not isinstance(atom_name, basestring):
            atom_name = atom_name.decode()
        element_symbol = atom_name.lstrip('0123456789')[0]

        try:
            element = elem.get_by_symbol(element_symbol)
        except KeyError:
            element = None

        topology.add_atom(atom_name, element,
                          registered_residues[ResidueID[i]])

    topology.create_standard_bonds()
    return topology
Exemple #5
0
def _traj_from_xyza(xyz, atomic_numbers, units='nm'):
    """
    Parameters
    ----------
    xyz : np.array, float, shape( num_atom, 3)
        array of x,y,z,a

    atomic_numbers : np.array, int, shape( num_atom, 1 )
        the atomic numbers of each of the atoms.

    Optional Parameters
    -------------------
    units : str
        if units == 'nm' then nothing happens. if units == 'ang' then
        we convert them to nm.
        
    Returns
    -------
    structure : mdtraj.trajectory
        A meta-data minimal mdtraj instance
    """
    
    if units == 'ang':
        xyz /= 10.

    top = Topology()
    chain = top.add_chain()
    residue = top.add_residue('XXX', chain)
    
    for i in range(xyz.shape[0]):
        element_symb = periodic_table[atomic_numbers[i]][1] # should give symbol
        element = Element.getBySymbol(element_symb)
        name = '%s' % element_symb
        top.add_atom(name, element, residue)
    
    structure = Trajectory(xyz=xyz, topology=top)

    return structure
Exemple #6
0
class Mapper:
    """
    An object to convert an atomistic system to a CG system

    Attributes
    ----------
    mappings : dict
        A dictionary containing the {name : mapping} for each residue

    solvent_mapping : int, default=4
        Number of solvent molecules to map to a single bead via k-means
        clustering

    solvent_name : string, default='tip3p'
        Name of solvent residue in atomistic system

    aa_traj : mdtraj.Trajectory
        The atomistic trajectory to convert to CG

    aa_top: mdtraj.Topology
        The atomistic topology to convert to CG

    cg_traj : mdtraj.Trajectory
        The CG trajectory to converted from atomistic

    cg_top: mdtraj.Topology
        The CG topology to convert from atomistic

    """
    def __init__(self, solvent_name='tip3p', solvent_mapping=4):
        self._mappings = dict()
        self._solvent_mapping = solvent_mapping
        self._solvent_name = solvent_name
        self._cg_traj = None
        self._cg_top  = None


    @property
    def mappings(self):
        return deepcopy(self._mappings)


    @mappings.setter
    def mappings(self):
        raise TypeError("'mappings' attribute does not support assignment")

    def load_trajectory(self, trajectory):
        self._aa_traj = trajectory
        self._aa_top = trajectory.top


    def load_mapping_dir(self, mapping_dir=None, **kwargs):
        """
        Load all mapping files from a directory.

        Arguments:
        ----------
        mapping_dir : string, default=None
            Path to the directory containing mapping files. Loads from
            the internal `mappings` directory by default
        **kwargs : keyword arguments
            Keyword arguments to pass to mapping_dir. Namely the ff arg.

        """
        if mapping_dir is None:
            mapping_dir = default_mapping_dir(**kwargs)

        assert path.exists(mapping_dir)

        for filename in glob.glob("{}/*map".format(mapping_dir)):
            self.load_mapping(filename)


    def load_mapping(self, filename_or_mapping):
        """
        Load a single mapping file from disk.

        Arguments:
        ----------
        filename : string or ResMapping
            Path to the mapping file or ResMapping object to add to library

        """
        if isinstance(filename_or_mapping, ResMapping):
            self._mappings.update({
                filename_or_mapping.name : filename_or_mapping})
        else:
            assert path.exists(filename_or_mapping)

            name = path.basename(filename_or_mapping).split(".")[0]
            mapping = ResMapping.load(name, filename_or_mapping)
            self._mappings.update({name : mapping})


    def cg_map(self):
        """
        Execute full CG mapping pipeline and return the CG trajectory

        """

        if self._cg_traj is None:
            self._map_topology()
            self._convert_xyz()
            self._construct_traj()

        return self._cg_traj


    def _map_topology(self):
        """
        Create CG topology from given topology and mapping

        """

        # Ensure that a trajectory has been loaded
        if self._aa_traj is None:
            raise OutOfOrderError("An atomistic trajectory has not "
                                  "been loaded into this Mapper yet.")


        self._atom_bead_mapping = dict()
        self._cg_top = Topology()
        self._solvent_counter = 0

        # Loop over all residues
        for residue in self._aa_top.residues:
            if residue.name == self._solvent_name:
                self._map_solvent_top(residue)
            else:
                self._map_nonsolvent_top(residue)


    def _map_solvent_top(self, residue):
        """
        Create CG solvent residue from given residue and add it to the
        CG topology.

        Arguments:
        ----------
        residue: mdtraj.topology.Residue
            The atomistic residue to be mapped to CG

        """
        self._solvent_counter += 1
        if self._solvent_counter % self._solvent_mapping == 0:
            cg_residue = self._cg_top.add_residue(self._solvent_name,
                                                  self._cg_top.add_chain())
            cg_bead = CGBead(bead_type=self._solvent_name)
            mdtraj_bead = self._cg_top.add_atom(self._solvent_name, None,
                                                cg_residue)
            self._atom_bead_mapping[mdtraj_bead] = cg_bead
            return cg_residue


    def _map_nonsolvent_top(self, residue):
        """
        Create CG non-solvent residue from given residue and add it to
        the CG topology.

        Arguments:
        ----------
        residue: mdtraj.topology.Residue
            The atomistic residue to be mapped to CG

        """

        # Obtain the correct molecule mapping based on the residue
        res_mapping = self._mappings[residue.name]

        # Add an empty residue to the CG topology
        cg_residue = self._cg_top.add_residue(
                            residue.name,
                            self._cg_top.add_chain())

        # Make a list of atoms in the residue
        atoms = np.array([atom.index for atom in residue.atoms])

        # Make an empty list to store beads
        cg_beads = []

        # Create CG beads for each bead in the mapping
        for bead in res_mapping.beads:
            bead_atoms = atoms.take(bead.mapping_indices)
            cg_bead = CGBead(bead_type=bead.name, atom_indices=bead_atoms)
            mdtraj_bead = self._cg_top.add_atom(cg_bead.bead_type, None,
                                                cg_residue)
            cg_beads.append(mdtraj_bead)
            self._atom_bead_mapping[mdtraj_bead] = cg_bead

        # Add bonds to topology
        for index_i, index_j in res_mapping.bonds:
            self._cg_top.add_bond(cg_beads[int(index_i)],
                                    cg_beads[int(index_j)])

        return cg_residue


    def _convert_xyz(self):
        """
        Take atomistic trajectory and convert to CG trajectory

        """

        cg_xyz = []
        for bead in self._cg_top.atoms:
            if bead.name == self._solvent_name:
                bead_xyz = np.zeros((self._aa_traj.n_frames,3))
            else:
                atom_indices = self._atom_bead_mapping[bead].atom_indices
                masses = np.array([self._aa_top.atom(i).element.mass
                                   for i in atom_indices])
                bead_xyz = (np.sum((self._aa_traj.xyz[:,atom_indices,:]
                                    * masses[None,:,None]), axis=1) /
                            np.sum(masses))

            cg_xyz.append(bead_xyz)

        cg_xyz = np.array(cg_xyz)
        cg_xyz = np.swapaxes(cg_xyz, 0, 1)

        # Figure out at which coarse grain index the waters start
        # Perform kmeans, frame-by-frame, over all water residues
        # Workers will return centers of masses of clusters, frame index, and cg index
        # Master will assign to CG_xyz
        if self._solvent_counter > 0:
            with Pool(cpu_count()) as pool:
                chunksize = int(self._aa_traj.n_frames / cpu_count()) + 1
                args = list(zip(self._aa_traj,
                                [self._solvent_mapping]*self._aa_traj.n_frames,
                                [self._solvent_name]*self._aa_traj.n_frames))
                coms = pool.starmap(_map_solvent, args, chunksize)

            pool.join()

            coms = np.squeeze(np.array(coms))
            cg_xyz[:,self._cg_top.select(f"name {self._solvent_name}"),:] = coms

        self._cg_xyz = cg_xyz


    def _construct_traj(self):
        """
        Create an mdtraj.Trajectory from the CG topology and xyz.

        """

        cg_traj = Trajectory(self._cg_xyz,
                             self._cg_top,
                             time=self._aa_traj.time,
                             unitcell_lengths=self._aa_traj.unitcell_lengths,
                             unitcell_angles=self._aa_traj.unitcell_angles)

        self._cg_traj = cg_traj
Exemple #7
0
def to_mdtraj_Topology(item, atom_indices='all', check=True):

    if check:

        digest_item(item, 'molsysmt.Topology')
        atom_indices = digest_atom_indices(atom_indices)

    try:
        from mdtraj import Topology
        from mdtraj.core import element
    except:
        raise LibraryNotFound('mdtraj')

    n_atoms = item.atoms_dataframe.shape[0]

    atom_index_array = item.atoms_dataframe["atom_index"].to_numpy()
    atom_name_array = item.atoms_dataframe["atom_name"].to_numpy()
    atom_id_array = item.atoms_dataframe["atom_id"].to_numpy()
    atom_type_array = item.atoms_dataframe["atom_type"].to_numpy()

    group_index_array = item.atoms_dataframe["group_index"].to_numpy()
    group_name_array = item.atoms_dataframe["group_name"].to_numpy()
    group_id_array = item.atoms_dataframe["group_id"].to_numpy()
    group_type_array = item.atoms_dataframe["group_type"].to_numpy()

    chain_index_array = item.atoms_dataframe["chain_index"].to_numpy()
    chain_name_array = item.atoms_dataframe["chain_name"].to_numpy()
    chain_id_array = item.atoms_dataframe["chain_id"].to_numpy()
    chain_type_array = item.atoms_dataframe["chain_type"].to_numpy()

    bonds_atom1 = item.bonds_dataframe["atom1_index"].to_numpy()
    bonds_atom2 = item.bonds_dataframe["atom2_index"].to_numpy()

    tmp_item = Topology()

    former_group_index = -1
    former_chain_index = -1

    list_new_atoms = []

    for ii in range(n_atoms):

        atom_index = atom_index_array[ii]
        atom_name = atom_name_array[ii]
        atom_id = atom_id_array[ii]
        atom_type = atom_type_array[ii]

        group_index = group_index_array[ii]
        chain_index = chain_index_array[ii]

        new_group = (former_group_index != group_index)
        new_chain = (former_chain_index != chain_index)

        if new_chain:
            chain = tmp_item.add_chain()
            former_chain_index = chain_index

        if new_group:
            residue_name = group_name_array[ii]
            residue_id = group_id_array[ii]
            residue = tmp_item.add_residue(residue_name,
                                           chain,
                                           resSeq=str(residue_id))
            former_group_index = group_index

        elem = element.get_by_symbol(atom_type)
        atom = tmp_item.add_atom(atom_name, elem, residue)

        list_new_atoms.append(atom)

    for atom_1, atom_2 in zip(bonds_atom1, bonds_atom2):

        tmp_item.add_bond(
            list_new_atoms[atom_1],
            list_new_atoms[atom_2])  # falta bond type and bond order

    return tmp_item