Beispiel #1
def lindemann_per_frames(u: Universe, select_lang):
    """Calculate the lindemann index for each atom AND FRAME

    Warning this can produce extremly large ndarrays in memory
    depending on the size of the cluster and the ammount of frames.

    u : MDA trajectory instance.
    select_lang : select language.

    a ndarray of shape (len_frames, natoms, natoms)
    # natoms = natoms
    sele_ori = u.select_atoms(select_lang)
    natoms = len(sele_ori)
    nframes = len(u.trajectory)
    len_frames = len(u.trajectory)
    array_mean = np.zeros((natoms, natoms))
    array_var = np.zeros((natoms, natoms))
    # array_distance = np.zeros((natoms, natoms))
    iframe = 1
    lindex_array = np.zeros((len_frames, natoms, natoms))
    cluster = u.select_atoms(select_lang, updating=True)
    for q, ts in enumerate(u.trajectory):
        # print(ts)
        coords = cluster.positions
        n, p = coords.shape
        array_distance = distance.cdist(coords, coords)

        # update mean and var arrays based on Welford algorithm suggested by Donald Knuth
        for i in range(natoms):
            for j in range(i + 1, natoms):
                xn = array_distance[i, j]
                mean = array_mean[i, j]
                var = array_var[i, j]
                delta = xn - mean
                # update mean
                array_mean[i, j] = mean + delta / iframe
                # update variance
                array_var[i, j] = var + delta * (xn - array_mean[i, j])
        iframe += 1
        if iframe > nframes + 1:

        for i in range(natoms):
            for j in range(i + 1, natoms):
                array_mean[j, i] = array_mean[i, j]
                array_var[j, i] = array_var[i, j]

        lindemann_indices = np.divide(np.sqrt(np.divide(array_var, nframes)),
        # lindemann_indices = np.nanmean(np.sqrt(array_var/nframes)/array_mean, axis=1)
        lindex_array[q] = lindemann_indices

    return np.array([np.nanmean(i, axis=1) for i in lindex_array])
Beispiel #2
def classifyResiduesIntoTwo(apo_pdb, holo_pdb, ligname, cutoff=4.0):
    S_aromatic_resname = set(['PHE', 'TRP', 'TYR', 'HIS'])
    u_holo, u_apo = Universe(holo_pdb), Universe(apo_pdb)
    ligand = u_holo.select_atoms(f'resname {ligname}')
    holo = u_holo.select_atoms(f'not resname {ligname}')
    apo = u_apo.select_atoms(f'protein')

    resids = []
    S_cryptic, S_not_cryptic = [], []
    # -- calculate distances from atoms of a ligand to those of residues in an apo state
    # -- the aim is to detect residues in a cryptic site.
    # -- if the distance is less than a threshold (i.e., CRASHED!), then the aromatic residue is considered as cryptic one.
    for iatom in ligand:
        for jatom in apo:
            distance = np.linalg.norm(iatom.position - jatom.position)

            if distance <= cutoff and jatom.resname in S_aromatic_resname:
                #print(f'{}-{iatom.resname}, {}-{jatom.resname}{jatom.resid}, {distance}')
    S_cryptic = set(S_cryptic)
    # -- a set of aromatic residue's names are generated here. note that this is specialised for aromatic residues
    S_all_aroma = set([
        f'{residue.resname}{residue.resid}' for residue in holo.residues
        if residue.resname in S_aromatic_resname
    S_not_cryptic = S_all_aroma - S_cryptic

    return set(S_cryptic), set(S_not_cryptic)
Beispiel #3
 def test_write_selection(self):
     ref = Universe(mol2_molecule)
     gr0 = ref.select_atoms("name C*")
     u = Universe(self.outfile)
     gr1 = u.select_atoms("name C*")
     assert_equal(len(gr0), len(gr1))
def main():
    u = Universe(
    #u = Universe('complex_models.pdb')
    ca_integrinAB = u.select_atoms('segid A B and name CA')
    ca_lamininE8 = u.select_atoms('segid C D E and name CA')

    lower, upper = 6.0, 10.0
    with open('model_no.out', 'w') as fout:
            f'#MODEL NO, nViolations (if r<{lower}), nContacts ({lower}<=r<={upper}) \n'

        for i, frame in enumerate(tqdm(u.trajectory),
                                  1):  # Note that i starts with 1.
            distances = distance.cdist(ca_integrinAB.positions,
            #nViolations = len(distances[distances<=cutoff])
            nViolations = len(distances[distances < lower])
            nContacts = len(distances[(distances <= upper)
                                      & (distances >= lower)])
            #            score = nContacts -nViolations
            if nViolations != 0:
                score = -0.59 * np.log(nContacts / nViolations)
                score = np.nan
            fout.write(f'{i}, {nViolations}, {nContacts}, {score}\n')
Beispiel #5
 def test_write_selection(self):
     ref = Universe(mol2_molecule)
     gr0 = ref.select_atoms("name C*")
     u = Universe(self.outfile)
     gr1 = u.select_atoms("name C*")
     assert_equal(len(gr0), len(gr1))
Beispiel #6
def distanceMatrix(trajFile, sele1, sele2, ref=None):

    if ref == None:
        u = Universe(trajFile)

        print("* Reference is given.")
        u = Universe(ref, trajFile)

    s1, s2 = u.select_atoms(sele1), u.select_atoms(sele2)
    print("* pair 1: \n    ", s1)
    print("* pair 2: \n    ", s2)

    distances = []
    for itraj in tqdm(u.trajectory):
        pos1, pos2 = s1.positions, s2.positions

        if sele1 == sele2:  #if self-distance pair calculation
            dist = np.triu(distance.cdist(
                pos1, pos2,
                metric='euclidean'))  #symmetrical matrix if self-distances
            dist = dist[dist != 0]  # remove 0 elements

        else:  #if differnt distance pair calculation
            dist = distance.cdist(pos1, pos2, metric='euclidean').flatten()

    return distances
Beispiel #7
def calc_tilt_end_to_end(universe: mda.Universe,
    ''' Calculate tilt related to angle between zaxis and resid_down --> resid_up
        Takes COM of resids
    fstr2 = '{: <15}{: <20}'
    fstr = '{: <15}{: <20.5f}'
    with open(fname, "w") as outf:
        print(fstr2.format("time", "tilt"), file=outf)
        for t in range(universe.trajectory.n_frames):
            time = universe.trajectory[t].time
  "At %s", time)
            zaxis = np.array([0, 0, 1])
            sel_u = universe.select_atoms("resid {}".format(resid_up))
            sel_d = universe.select_atoms("resid {}".format(resid_down))
            pos_u = sel_u.center_of_mass()
            pos_d = sel_d.center_of_mass()
            costilt =
                (pos_d - pos_u), zaxis) / np.linalg.norm(pos_d - pos_u)
            angle = np.arccos(costilt) * (180 / np.pi)
            if angle > 90:
                angle -= 180
            print(fstr.format(time, abs(angle)), file=outf)
Beispiel #8
def cluster_coordinates(  # TODO: rewrite the method
    nvt_run: Universe,
    select_dict: Dict[str, str],
    run_start: int,
    run_end: int,
    species: List[str],
    distance: float,
    basis_vectors: Optional[Union[List[np.ndarray], np.ndarray]] = None,
    cluster_center: str = "center",
) -> np.ndarray:
    """Calculates the average position of a cluster.

        nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory.
        select_dict: A dictionary of atom species selection, where each atom species name is a key
            and the corresponding values are the selection language.
        run_start: Start frame of analysis.
        run_end: End frame of analysis.
        species: A list of species in the cluster.
        distance: The coordination cutoff distance.
        basis_vectors: The basis vector for normalizing the coordinates of the cluster atoms.
        cluster_center: Cluster center atom species.

        An array of coordinates of the cluster atoms.
    trj_analysis = nvt_run.trajectory[run_start:run_end:]
    cluster_center_atom = nvt_run.select_atoms(select_dict.get(cluster_center),
    selection = ("(" + " or ".join(s for s in species) + ") and (around " +
                 str(distance) + " index " + str(cluster_center_atom.index) +
    shell = nvt_run.select_atoms(selection, periodic=True)
    cluster = []
    for atom in shell:
        coord_list = []
        for ts in trj_analysis:
        cluster.append(np.mean(np.array(coord_list), axis=0))
    cluster_array = np.array(cluster)
    if basis_vectors:
        if len(basis_vectors) == 2:
            vec1 = basis_vectors[0]
            vec2 = basis_vectors[1]
            vec3 = np.cross(vec1, vec2)
            vec2 = np.cross(vec1, vec3)
        elif len(basis_vectors) == 3:
            vec1 = basis_vectors[0]
            vec2 = basis_vectors[1]
            vec3 = basis_vectors[2]
            raise ValueError("incorrect vector format")
        vec1 = vec1 / np.linalg.norm(vec1)
        vec2 = vec2 / np.linalg.norm(vec2)
        vec3 = vec3 / np.linalg.norm(vec3)
        basis_xyz = np.transpose([vec1, vec2, vec3])
        cluster_norm = np.linalg.solve(basis_xyz, cluster_array.T).T
        cluster_norm = cluster_norm - np.mean(cluster_norm, axis=0)
        return cluster_norm
    return cluster_array
Beispiel #9
def res_dict_from_select_dict(u: Universe,
                              select_dict: Dict[str, str]) -> Dict[str, str]:
    Infer res_dict (residue selection) from select_dict (atom selection) in a MDAnalysis.universe object.

        u: The universe object to assign resnames to.
        select_dict: A dictionary of atom species, where each atom species name is a key
                and the corresponding values are the selection language.

        A dictionary of resnames.
    saved_select = []
    res_dict = {}
    for key, val in select_dict.items():
        res_select = "same resid as (" + val + ")"
        res_group = u.select_atoms(res_select)
        if key in ["cation", "anion"] or res_group not in saved_select:
            res_dict[key] = res_select
    if ("cation" in res_dict and "anion" in res_dict and u.select_atoms(
            res_dict.get("cation")) == u.select_atoms(res_dict.get("anion"))):
        res_dict["salt"] = res_dict.pop("cation")
    return res_dict
Beispiel #10
def save_systems(flex: mda.Universe, protein: mda.Universe,
                 crystal: mda.Universe, dir: str):
    def sel(resnum, resname, segid, icode) -> str:
        s = f"(resid {resnum}{icode} and resname {resname} and segid {segid})"

        return s

    flexres = flex.select_atoms("protein").residues

    max_rmsd = -1

    residues = []
    for res in flexres:
        ressel = (sel(res.resnum, res.resname, res.segid, res.icode) +
                  " and not (type H or name H*)")

        # Select single residue
        p_res = protein.select_atoms(ressel)
        c_res = crystal.select_atoms(ressel)

        assert p_res.n_atoms == c_res.n_atoms

        pfname = os.path.join(
            dir, f"pflex-{res.resname}-{res.segid}{res.resnum}{res.icode}.pdb")
        cfname = os.path.join(
            dir, f"cflex-{res.resname}-{res.segid}{res.resnum}{res.icode}.pdb")

        # Write out PDB files

        residues.append((res.resnum, res.resname, res.segid, res.icode))

    # Check that all flexible residues are listed
    assert len(residues) == len(flexres)

    # TODO: Can be improved by using ressel
    selection = "".join([
        sel(id, name, chain, icode) + " or "
        for id, name, chain, icode in residues
    selection = selection[:-4]  # Remove final " or "

    # Remove H atoms
    # TODO: Possibly need perception for atom name, when type is not present
    selection = f"({selection}) and not (type H or name H*)"

    p_atoms = protein.select_atoms(selection)
    c_atoms = crystal.select_atoms(selection)

    # Check that the number of atoms in the two selections is equal
    assert len(p_atoms) == len(c_atoms)

    pfname = os.path.join(dir, "pflex.pdb")
    cfname = os.path.join(dir, "cflex.pdb")

Beispiel #11
 def test_atomgroups(self):
     u = Universe(self.filename)
     segidB0 = len(u.select_atoms("segid B and (not altloc B)"))
     segidB1 = len(u.select_atoms("segid B and (not altloc A)"))
     assert_equal(segidB0, segidB1)
     altlocB0 = len(u.select_atoms("segid B and (altloc A)"))
     altlocB1 = len(u.select_atoms("segid B and (altloc B)"))
     assert_equal(altlocB0, altlocB1)
     sum = len(u.select_atoms("segid B"))
     assert_equal(sum, segidB0 + altlocB0)
Beispiel #12
def num_of_neighbor_simple(
    nvt_run: Universe,
    center_atom: Atom,
    distance_dict: Dict[str, float],
    select_dict: Dict[str, str],
    run_start: int,
    run_end: int,
) -> Dict[str, np.ndarray]:
    """Calculates solvation structure type (1 for SSIP, 2 for CIP and 3 for AGG) with respect to the ``enter_atom``
    in the specified frame range.

        nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory.
        center_atom: The solvation shell center atom.
        distance_dict: A dict of coordination cutoff distance of the neighbor species.
        select_dict: A dictionary of atom species selection, where each atom species name is a key
            and the corresponding values are the selection language.
        run_start: Start frame of analysis.
        run_end: End frame of analysis.

        A dict with "total" as the key and an array of the solvation structure type in the specified frame range
        as the value.

    time_count = 0
    trj_analysis = nvt_run.trajectory[run_start:run_end:]
    center_selection = "same type as index " + str(center_atom.index)
    assert len(
    ) == 1, "Please only specify the counter-ion species in the distance_dict"
    species = list(distance_dict.keys())[0]
    cn_values = np.zeros(int(len(trj_analysis)))
    for ts in trj_analysis:
        selection = select_shell(select_dict, distance_dict, center_atom,
        shell = nvt_run.select_atoms(selection, periodic=True)
        shell_len = len(shell)
        if shell_len == 0:
            cn_values[time_count] = 1
        elif shell_len == 1:
            selection_species = select_shell(center_selection, distance_dict,
                                             shell.atoms[0], species)
            shell_species = nvt_run.select_atoms(selection_species,
            shell_species_len = len(shell_species) - 1
            if shell_species_len == 0:
                cn_values[time_count] = 2
                cn_values[time_count] = 3
            cn_values[time_count] = 3
        time_count += 1
    cn_values = {"total": cn_values}
    return cn_values
Beispiel #13
def split_molecules(
    u: mda.Universe,
    keep_ions: bool = False
) -> Dict[str, Union[mda.AtomGroup, List[mda.AtomGroup]]]:
    Split different molecules (protein, water, ligands, ...) within a structure in separate files.

        u (mda.Universe): MDAnalysis universe
        keep_ions (bool, optional): Flag to keep/ignore ions

        A dictionaty with the name of the selection and the corresponding ``mda.AtomGroup``
        (or a list of ``mda.AtomGroup`` is there are multiple molecules with the same name).

    split = {}

    # Select protein
    protein = u.select_atoms("protein")
    if len(protein.atoms) != 0:  # Check if protein is present
        split["protein"] = protein

    # Select water molecules
    for water_name in ["WAT", "HOH"]:
        water = u.select_atoms(f"resname {water_name}")

        if len(water.atoms) != 0:
            break  # If selection is not empty, stop
    if len(water.atoms) != 0:  # Check if water is present
        split["water"] = water

    # Other molecules
    other = u.select_atoms("all") - protein - water
    for res in other.residues:  # Loop over all "other" residues
        name = res.resname

        if"[A-Z]?[+-]", name) is not None and not keep_ions:
            break  # Skip ion if keep_ions=True

            old = split[name]

            if type(old) is list:
                split[name] = [old, res]

        except KeyError:

            split[name] = res

    return split
Beispiel #14
def select_flexres(flex: mda.Universe, prot: mda.Universe) -> mda.AtomGroup:
    Given a protein and a series of flexible residues, selectss the full flexible
    residues (including backbone atoms) from the protein structure.

        flex (mda.Universe): flexible residues
        prot (mda.Universe): protein

        An `mda.AtomGroup` containing the atoms corresponding to flexible residues
        extracted from the protein (including backbone atoms)

    fres = []
    for res in flex.residues:
        fres.append((res.resnum, res.resname, res.icode, res.segid))

    sel = "".join(
            f"(resid {num}{icode} and resname {name} and segid {chain}) or "
            for num, name, icode, chain in fres

    # Sanitize selection and remove residues without Janin dihedrals
    # Ignoring them explicitly removes a warning
    sel = (
        + "and not (resname ALA or resname CYS or resname GLY or resname PRO or resname SER or resname THR or resname VAL)"

    return prot.select_atoms(sel)
Beispiel #15
 def test_bonds(self):
     u = Universe(self.filename, guess_bonds=True)
     # need to force topology to load before querying individual atom bonds
     bonds0 = u.select_atoms("segid B and (altloc A)")[0].bonds
     bonds1 = u.select_atoms("segid B and (altloc B)")[0].bonds
     assert_equal(len(bonds0), len(bonds1))
Beispiel #16
def select(system: mda.Universe,
           distance: float,
           removeHs: bool = False) -> Tuple[np.ndarray, np.ndarray]:
    Select binding site.

    system: mda.Universe
        Protein-ligand complex
    distance: float
        Ligand-residues distance
    removeHs: bool
        Remove hydrogen atoms

    Tuple[np.ndarray, np.ndarray]
        Array of elements and array of cartesian coordinate for ligand and protein
        atoms within the binding site

    The binding site is defined by residues with at least one atom within
    :code:`distance` from the ligand.
    resselection = system.select_atoms(
        f"(byres (around {distance} (resname LIG))) or (resname LIG)")

    if removeHs:
        mask = resselection.elements != "H"
        # Elements from PDB file needs MDAnalysis@develop (see #2648)
        return resselection.elements[mask], resselection.positions[mask]
        return resselection.elements, resselection.positions
Beispiel #17
 def test_bonds(self):
     u = Universe(self.filename, guess_bonds=True)
     # need to force topology to load before querying individual atom bonds
     bonds0 = u.select_atoms("segid B and (altloc A)")[0].bonds
     bonds1 = u.select_atoms("segid B and (altloc B)")[0].bonds
     assert_equal(len(bonds0), len(bonds1))
Beispiel #18
def main():
    # get options
    options = parse_options()
    psf = options.psf_file
    dcd = options.dcd_file
    chain1 = options.segid1
    chain2 = options.segid2
    selection1 = options.selection1
    selection2 = options.selection2
    co = options.cutoff
    output = options.output_file
    visu = options.pymol
    pdbvisu = options.pymol_pdb

    # use MDAnalysis to read trajectory
    u = Universe(psf, dcd)

    # get contact probability
    cp = GetContacts(u)
    contactprob, bio1, bio2 =, chain2, selection1, selection2,
    np.savetxt(output, contactprob, fmt='%4.2f', delimiter=" ")

    # generate pymol scripts if needed
    if visu == 'Y':
        # if no pdb file is supplied, write one from trajectory, first frame
        if pdbvisu == None:
            seleforpymol = u.select_atoms("segid %s or segid %s" %
                                          (chain1, chain2))
            seleforpymol.write('forpymol.pdb', remarks=None)
            pdbvisu = 'forpymol.pdb'
            # check pdb file format for weird encoding

        pymol_contact_visu(contactprob, pdbvisu, chain1, chain2, bio1, bio2)
Beispiel #19
def analyze_radgyr(u: mda.Universe) -> List[float]:
    """Extract the radius of gyration metric for each trajectory frame."""
    trajectory_radgyr = []
    atoms = u.select_atoms(STANDARD_SELECTION)
    for _ in u.trajectory:

    return trajectory_radgyr
Beispiel #20
def select_dict_from_resname(u: Universe) -> Dict[str, str]:
    Infer select_dict (possibly interested atom species selection) from resnames in a MDAnalysis.universe object.
    The resname must be pre-assigned already.

        u: The universe object to work with.

        A dictionary of atom species.
    select_dict: Dict[str, str] = {}
    resnames = np.unique(u.residues.resnames)
    for resname in resnames:
        if resname == "":
        residue = u.select_atoms("resname " + resname).residues[0]
        if np.isclose(residue.charge, 0,
                      atol=1e-5):  # np.sum(residue.atoms.charges)
            if len(residue.atoms.fragments) == 2:
                for i, frag in enumerate(residue.atoms.fragments):
                    charge = np.sum(frag.charges)
                    if charge > 0.001:
                        extract_atom_from_ion(True, frag, select_dict)
                    elif charge < -0.001:
                        extract_atom_from_ion(False, frag, select_dict)
                                                   number=i + 1)
            elif len(residue.atoms.fragments) >= 2:
                cation_number = 1
                anion_number = 1
                molecule_number = 1
                for frag in residue.atoms.fragments:
                    charge = np.sum(frag.charges)
                    if charge > 0.001:
                        extract_atom_from_ion(True, frag, select_dict,
                        cation_number += 1
                    elif charge < -0.001:
                        extract_atom_from_ion(False, frag, select_dict,
                        anion_number += 1
                        extract_atom_from_molecule(resname, frag, select_dict,
                        molecule_number += 1
                extract_atom_from_molecule(resname, residue, select_dict)
        elif residue.charge > 0:
            extract_atom_from_ion(True, residue, select_dict)
            extract_atom_from_ion(False, residue, select_dict)
    return select_dict
    def output_pdb_w_index(self):
        #This scales sigma. The reason for this is because PDB files accepts few significant digits/
        # sigma is usually 10^2 ~ 10^3 order, so if sigma was 0.011, then the sigma value to be written would be 0.01 in the PDB. I want to avoid this. 
        scale_factor = 100.0 

        u = Universe(self.__ref)        
        #initialize the b-factor column
        u.atoms.tempfactors = 0
        for icalpha in u.atoms.select_atoms('name CA'):
            if icalpha.resname in ['PHE','TRP','TYR','HIS']:
                 key = icalpha.resname + str(icalpha.resid) + icalpha.segid.replace('SYSTEM', 'A')
                 DF    = self.cryptic_index[key][0]
                 sigma = self.cryptic_index[key][1]
                 print(key, DF, sigma)
                 if np.abs(DF) < self.__alpha:
#                     print(key, DF, sigma)
                     icalpha.tempfactor = sigma * scale_factor

Beispiel #22
def check_contiguous_steps(
    nvt_run: Universe,
    center_atom: Atom,
    distance_dict: Dict[str, float],
    select_dict: Dict[str, str],
    run_start: int,
    run_end: int,
    checkpoints: np.ndarray,
    lag: int = 20,
) -> Dict[str, np.ndarray]:
    """Calculates the distance between the center atom and the neighbor atom
    in the checkpoint +/- lag time range.

        nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory.
        center_atom: The center atom object.
        distance_dict: A dictionary of Cutoff distance of neighbor for each species.
        select_dict: A dictionary of atom species selection, where each atom species name is a key
            and the corresponding values are the selection language.
        run_start: Start frame of analysis.
        run_end: End frame of analysis.
        checkpoints: The frame numberings of interest to check for contiguous steps.
        lag: The range (+/- lag) of the contiguous steps. Default to 20.

        An array of distance between the center atom and the neighbor atoms
        in the checkpoint +/- lag time range.
    coord_num: Dict[str, Union[List[List[int]], np.ndarray]] = {
        x: [[] for _ in range(lag * 2 + 1)]
        for x in distance_dict
    trj_analysis = nvt_run.trajectory[run_start:run_end:]
    has = False
    for i, ts in enumerate(trj_analysis):
        log = False
        checkpoint = -1
        for j in checkpoints:
            if abs(i - j) <= lag:
                log = True
                has = True
                checkpoint = j
        if log:
            for kw in distance_dict:
                selection = select_shell(select_dict, distance_dict,
                                         center_atom, kw)
                shell = nvt_run.select_atoms(selection, periodic=True)
                coord_num[kw][i - checkpoint + lag].append(len(shell))
    one_atom_ave = {}
    if has:
        for kw in coord_num:
            np_arrays = np.array(
                [np.array(time).mean() for time in coord_num[kw]])
            one_atom_ave[kw] = np_arrays
    return one_atom_ave
Beispiel #23
def _list_types(coordinates_file):

    # Check the extension
    _check_input_file(coordinates_file, extensions=[".gro"])

    # Load the system
    system = Universe(coordinates_file)

    # List the residue names
    resnames = system.select_atoms("all").resnames

    return np.unique(resnames)
Beispiel #24
def analyze_sasa(u: mda.Universe) -> np.ndarray:
    """Extract SASA value for each trajectory frame."""
    atoms = u.select_atoms(STANDARD_SELECTION)
    positions = u.trajectory.timeseries(asel=atoms)

    trajectory_sasa = []
    atom_radius = list(map(get_atom_radius, atoms))
    for frame in np.swapaxes(positions, 0, 1):
        sasa = freesasa.calcCoord(frame.reshape(-1), atom_radius).totalArea()

    return np.array(trajectory_sasa)
Beispiel #25
def analyze_pca(u: mda.Universe, n_dimensions=40):
    """Fetch PCA component contribution values for a single trajectory."""
    pca_analysis = pca.PCA(u, select='backbone')
    space =

    space_3 = space.transform(u.select_atoms('backbone'), 3)
    w = pca.cosine_content(space_3, 0)

    return [
        space.variance[:n_dimensions], space.cumulated_variance[:n_dimensions]
Beispiel #26
def calc_neigh_corr(
    nvt_run: Universe,
    distance_dict: Dict[str, float],
    select_dict: Dict[str, str],
    time_step: float,
    run_start: int,
    run_end: int,
    center_atom: str = "cation",
) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
    """Calculates the neighbor auto-correlation function (ACF)
    of selected species around center atom.

        nvt_run: An MDAnalysis ``Universe``.
        run_start: Start frame of analysis.
        run_end: End frame of analysis.
        center_atom: The center atom to calculate the ACF for. Default to "cation".

        A tuple containing the time series, and a dict of acf of neighbor species.
    # Set up times array
    times = []
    step = 0
    center_atoms = nvt_run.select_atoms(select_dict[center_atom])
    for ts in nvt_run.trajectory[run_start:run_end]:
        times.append(step * time_step)
        step += 1
    times = np.array(times)

    acf_avg = {}
    for kw in distance_dict.keys():
        acf_all = []
        for atom in tqdm(center_atoms[::]):
            distance = distance_dict.get(kw)
            assert distance is not None
            adjacency_matrix = neighbors_one_atom(
            acfs = calc_acf(adjacency_matrix)
            for acf in acfs:
        acf_avg[kw] = np.mean(acf_all, axis=0)
    return times, acf_avg
def generate_universe(topology, trajectory=None):
    print('Generating Universe...')
    if trajectory is None or trajectory == '':
        u = Universe(topology)
        u = Universe(topology, trajectory)

    x, y, z = u.dimensions[:3]
    print(f'Universe with dimensions x: {x}, y: {y}, z: {z} loaded!')
    n_waters = u.select_atoms('resname WAT').n_residues
    print(f'{n_waters} water molecules detected!')

    return u
Beispiel #28
def distance_to_cnt(u: Universe, selection_cluster, cluster_size):
    """For carbon nanotube included trajectories, analyze cluster atoms.

    u : MDA trajectory instance.
    selection_cluster : selection_cluster:
    cluster_size : size of clusters


    distances = np.zeros((len(u.trajectory), cluster_size))
    cnt = u.select_atoms('name C', updating=True)
    pt = u.select_atoms(selection_cluster, updating=True)
    for q, ts in enumerate(u.trajectory):
        cg = cnt.center_of_geometry()
        for p, t in enumerate(pt.positions):
            dis = distance.euclidean(t, [cg[0], cg[1], t[2]])
            distances[q, p] = dis
    return distances
Beispiel #29
def check_inputs(selection: list, start: int, stop: int, step: int,
                 universe: mda.Universe):
    ag_sel = selection[0]
    ag_names = selection[1]
    ag_pair = selection[2]

    # Testing names and selections
    if len(ag_sel) > len(ag_names):
        raise InputError('Not all selections are named')
    elif len(ag_sel) < len(ag_names):
        raise InputError('Too many selection names for number of selections')

    for sel in ag_sel:
            ag = universe.select_atoms(sel)
        except mda.SelectionError:
            raise InputError('Error in selection: {}'.format(sel))

    for pair in ag_pair:
        if len(pair) != 4:
            raise InputError(
                'Pairs must be a python list of string with 4 items')
        found0 = False
        found1 = False
        for name in ag_names:
            if pair[0] == name:
                found0 = True
            if pair[1] == name:
                found1 = True
        if found0 is False:
            raise InputError(
                f'{pair[0]} in {pair} group_pair_selections is not in defined in atom_group_names'
        if found1 is False:
            raise InputError(
                f'{pair[1]} in {pair} group_pair_selections is not in defined in atom_group_names'

        if start >= stop:
            raise InputError('Start is greater than or equal to stop')
        if step >= stop:
            raise InputError('Step is greater than or equal to stop')
        if step == 0:
            raise InputError('Step cannot be 0')

        if len(universe.trajectory) < stop:
            raise InputError(
                f'Stop exceeds length of trajectory, trajectory is {len(universe.trajectory)} frames'

    print('Input Parameters Accepted')
Beispiel #30
def main():
    #they were downloaded by a certain rule via the adcanced serarch in rcsb pdb,
    #but they contains more than 3 chains, which was out of my scope.
    omited_pdbs = ['4e7u.pdb', '4e7t.pdb', '3exx.pdb', '4fka.pdb', '5ep6.pdb',
                   '4gxv.pdb', '4uqp.pdb', '3uqy.pdb', '4dg4.pdb', '4urh.pdb',
                   '6f4j.pdb', '1xd3.pdb', '3bog.pdb', '6mee.pdb', '4pj2.pdb',
                   '5bpk.pdb', '3cjs.pdb', '4c2v.pdb', '1pid.pdb', '6fu9.pdb',
                   '2oxg.pdb', '1svf.pdb', '6fc1.pdb', '1q7l.pdb', '4kn9.pdb',
                   '4b2b.pdb', '6g6k.pdb', '4m4l.pdb', '4b2c.pdb', '1ben.pdb',
                   '3tt8.pdb', '3fq9.pdb', '5nwg.pdb', '4uql.pdb', '2xkn.pdb',

    filenames = glob.glob('./interfaces/heterodimer/*.pdb')

    whole_distances = []
    for j, file in enumerate(filenames):
        #print(j, file, file.split('/')[-1] in omited_pdbs)

        if file.split('/')[-1] in omited_pdbs:

            print(file.split('/')[-1].split('.')[0].upper()+",",end = '')
            u = Universe(file)

            nchains = len(set(u.segments.segids))
            if nchains != 2: sys.exit(f'The number of chains = {nchains}. that is out of scope for this program.')

            chain_objs = []
            for i, chain in enumerate(set(u.segments.segids)):
                chain_objs.append(u.select_atoms(f'protein and segid {chain} and name CA'))
                print(f'    *{chain}')
#               print(chain_objs[i].atoms)

            whole_distances.append(distances(chain_objs[0], chain_objs[1]))

    sys.exit('stop! you might have already done this. so i forced you to procced.')
    whole_distances = np.hstack(whole_distances)
    filtered_dist   = whole_distances[whole_distances<=50.0]

    with open('whole_distances.pkl','wb') as f:
        pickle.dump(whole_distances, f)

    mean = np.mean(whole_distances)
    std  = np.std(whole_distances)

    print(f'mean: {mean}, std:{std}')
Beispiel #31
def assign_resname(u: Universe, res_dict: Dict[str, str]):
    Assign resnames to residues in a MDAnalysis.universe object. The function will not overwrite existing resnames.

        u: The universe object to assign resnames to.
        res_dict: A dictionary of resnames, where each resname is a key
            and the corresponding values are the selection language.
    for key, val in res_dict.items():
        res_group = u.select_atoms(val)
        res_names = res_group.residues.resnames
        res_names[res_names == ""] = key
        res_group.residues.resnames = res_names
Beispiel #32
def neighbor_distance(
    nvt_run: Universe,
    center_atom: Atom,
    run_start: int,
    run_end: int,
    species: str,
    select_dict: Dict[str, str],
    distance: float,
) -> Dict[str, np.ndarray]:
    Calculates a dictionary of distances between the ``center_atom`` and neighbor atoms.

        nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory.
        center_atom: The center atom object.
        run_start: Start frame of analysis.
        run_end: End frame of analysis.
        species: The neighbor species in the select_dict.
        select_dict: A dictionary of atom species selection, where each atom species name is a key
            and the corresponding values are the selection language.
        distance: The neighbor cutoff distance.

        A dictionary of distance of neighbor atoms to the ``center_atom``. The keys are atom indexes in string type .
    dist_dict = {}
    time_count = 0
    trj_analysis = nvt_run.trajectory[run_start:run_end:]
    species_selection = select_dict.get(species)
    if species_selection is None:
        raise ValueError("Invalid species selection")
    for ts in trj_analysis:
        selection = ("(" + species_selection + ") and (around " +
                     str(distance) + " index " + str(center_atom.index) + ")")
        shell = nvt_run.select_atoms(selection, periodic=True)
        for atom in shell.atoms:
            if str(atom.index) not in dist_dict:
                dist_dict[str(atom.index)] = np.full(run_end - run_start,
        time_count += 1
    time_count = 0
    for ts in trj_analysis:
        for atom_index, val in dist_dict.items():
            dist = distance_array(ts[center_atom.index], ts[int(atom_index)],
            val[time_count] = dist
        time_count += 1
    return dist_dict
Beispiel #33
 def test_write_read(self):
     u = Universe(self.filename)
     u2 = Universe(self.outfile)
     assert_equal(len(u.atoms), len(u2.atoms))
Beispiel #34
    args = parser.parse_args()

    if not args.static:
        header_string = "; Umbrella potential for a spherical shell cavity\n"\
        "; Name    Type          Group  Kappa   Nstar    mu    width  cutoff  outfile    nstout\n"\
        "hydshell dyn_union_sph_sh   OW  0.0     0   XXX    0.01   0.02   phiout.dat   50  \\\n"
        header_string = "; Umbrella potential for a spherical shell cavity\n"\
        "; Name    Type          Group  Kappa   Nstar    mu    width  cutoff  outfile    nstout\n"\
        "hydshell union_sph_sh   OW  0.0     0   XXX    0.01   0.02   phiout.dat   50  \\\n"        

    if args.traj is None:
        u = Universe(args.gro)

        if args.sspec is not None:
            prot_heavies = u.select_atoms(args.sspec)
            # Select peptide heavies - exclude water's and ions
            prot_heavies = u.select_atoms("not (name H* or type H or resname SOL) and not (name NA or name CL) and not (resname WAL) and not (resname DUM)")

        fout = open(args.outfile, 'w')

        if args.static:
            for atm in prot_heavies:
                fout.write("{:<10.1f} {:<10.1f} {:<10.3f} {:<10.3f} {:<10.3f}\\\n".format(-0.5, args.rad/10.0, atm.pos[0]/10.0, atm.pos[1]/10.0, atm.pos[2]/10.0))
            for atm in prot_heavies:
                fout.write("{:<10.1f} {:<10.1f} {:d} \\\n".format(-0.5, args.rad/10.0, atm.index+1))

At this time, I wanted to confirm if the com of s100b was canceled.

Caution: this program is specialized for s100b-CTD system.

Usage: python [ PDB file name ]   

file_name = sys.argv[1]
print "Input file name : ", file_name

u = Universe(file_name)
f_out = open(file_name+"_comTraj.dat", "w")
print "No of snapshots: ", len(u.trajectory)

for i, ts in enumerate(u.trajectory):

    #Select the all atoms constitute s100b
    selected_atoms = u.select_atoms("resid 1-94")

    print "atom ids: ", selected_atoms.ids

    com = selected_atoms.center_of_mass()
    cog = selected_atoms.center_of_geometry()

    f_out.write(str(com[0]) + " " + str(com[1]) + " " + str(com[2]) + " \n")

.. SeeAlso:: :mod:`MDAnalysis.analysis.psa`


from MDAnalysis import Universe
from MDAnalysis.analysis.align import rotation_matrix
from MDAnalysis.analysis.psa import PSAnalysis

if __name__ == '__main__':

    print("Generating AdK CORE C-alpha reference coordinates and structure...")
    # Read in closed/open AdK structures; work with C-alphas only
    u_closed = Universe('structs/adk1AKE.pdb')
    u_open = Universe('structs/adk4AKE.pdb')
    ca_closed = u_closed.select_atoms('name CA')
    ca_open = u_open.select_atoms('name CA')

    # Move centers-of-mass of C-alphas of each structure's CORE domain to origin
    adkCORE_resids = "(resid 1:29 or resid 60:121 or resid 160:214)"

    # Get C-alpha CORE coordinates for each structure
    closed_ca_core_coords = ca_closed.select_atoms(adkCORE_resids).positions
    open_ca_core_coords = ca_open.select_atoms(adkCORE_resids).positions

    # Compute rotation matrix, R, that minimizes rmsd between the C-alpha COREs
    R, rmsd_value = rotation_matrix(open_ca_core_coords, closed_ca_core_coords)

    # Rotate open structure to align its C-alpha CORE to closed structure's
from MDAnalysis import Universe, collection, Timeseries
from MDAnalysis.tests.datafiles import PSF, DCD

    import matplotlib

    matplotlib.use('agg')  # no interactive plotting, only save figures
    from pylab import errorbar, legend, xlabel, ylabel, savefig, clf, gca, draw

    have_matplotlib = True
except ImportError:
    have_matplotlib = False

universe = Universe(PSF, DCD)
protein = universe.select_atoms("protein")

numresidues = protein.numberOfResidues()

for res in range(2, numresidues - 1):
    print "Processing residue {0:d}".format(res)
    # selection of the atoms involved for the phi for resid '%d' %res
    ## select_atoms("atom 4AKE %d C"%(res-1), "atom 4AKE %d N"%res, "atom %d 4AKE CA"%res, "atom 4AKE %d C" % res)
    phi_sel = universe.residues[res].phi_selection()

    #  selection of the atoms involved for the psi for resid '%d' %res
    psi_sel = universe.residues[res].psi_selection()

    # collect the timeseries of a dihedral
import MDAnalysis
from MDAnalysis import Universe
from MDAnalysis.analysis.contacts import calculate_contacts
import numpy as np
import pandas as pd

ref = Universe("conf_protein.gro.bz2")
u = Universe("conf_protein.gro.bz2", "traj_protein_0.xtc")

x = len(ref.select_atoms("protein"))
selA = "not name H* and resid 72-95 and bynum {}:{}".format(1, x//2)
selB = "not name H* and resid 72-95 and bynum {}:{}".format(x//2, x)

data = calculate_contacts(ref, u, selA, selB)
df = pd.DataFrame(data, columns=["Time (ps)", "Q"])