Beispiel #1
0
def write_atom_to_pdb(pdb_outname, atom_location, atom_ID, atomgroup):
    """
    Write a new atom to a reference structure to visualise conserved non-protein atom sites.

    Parameters
    ----------
    pdb_outname : str
        Filename of reference structure.
    atom_location : array
        (x,y,z) coordinates of the atom location with respect to the reference structure.
    atom_ID : str
        A unique ID for the atom.
    atomgroup : str
        MDAnalysis atomgroup to describe the atom.

    """

    ##PDB_VISUALISATION
    ##rescursively add waters to the pdb file one by one as they are processed
    # # Read the file into Biotite's structure object (atom array)
    atom_array = strucio.load_structure(pdb_outname)
    res_id = atom_array.res_id[-1] + 1
    # Add an HETATM
    atom = struc.Atom(
        coord=atom_location,
        chain_id="X",
        # The residue ID is the last ID in the file +1
        res_id=res_id,
        res_name=atom_ID,
        hetero=True,
        atom_name=atomgroup,
        element="O")
    atom_array += struc.array([atom])
    # Save edited structure
    strucio.save_structure(pdb_outname, atom_array)
Beispiel #2
0
def atom_list():
    chain_id = ["A", "A", "B", "B", "B"]
    res_id = [1, 1, 1, 1, 2]
    res_name = ["ALA", "ALA", "PRO", "PRO", "MSE"]
    hetero = [False, False, False, False, True]
    atom_name = ["N", "CA", "O", "CA", "SE"]
    element = ["N", "C", "O", "C", "SE"]
    atom_list = []
    for i in range(5):
        atom_list.append(
            struc.Atom([i, i, i],
                       chain_id=chain_id[i],
                       res_id=res_id[i],
                       res_name=res_name[i],
                       hetero=hetero[i],
                       atom_name=atom_name[i],
                       element=element[i]))
    return atom_list
def rotate_residue(mol, bond_number, angle):

    # --- Identify rotatable bonds ---
    rotatable_bonds = struc.find_rotatable_bonds(mol.bonds)

    # --- do not rotate about backbone bonds ---
    for atom_name in BACKBONE:
        index = np.where(mol.atom_name == atom_name)[0][0]
        rotatable_bonds.remove_bonds_to(index)

    # --- init coordinates for new model ---
    coord = mol.coord.copy()

    # --- get bond axis ---
    atom_i, atom_j, _ = rotatable_bonds.as_array()[bond_number]
    axis = coord[atom_j] - coord[atom_i]

    # --- get support atom ---
    support = coord[atom_i]

    # --- need to get atoms only on one side of the bond ---
    bond_list_without_axis = mol.bonds.copy()
    bond_list_without_axis.remove_bond(atom_i, atom_j)
    rotated_atom_indices = struc.find_connected(bond_list_without_axis,
                                                root=atom_j)

    # --- rotate atoms ---
    coord[rotated_atom_indices] = struc.rotate_about_axis(
        coord[rotated_atom_indices], axis, angle, support)

    atom_list = []
    for i, atom_i in enumerate(mol):
        atom_new = struc.Atom(coord[i],
                              atom_name=atom_i.atom_name,
                              element=atom_i.element)
        atom_list.append(atom_new)
    new_mol = struc.array(atom_list)
    new_mol.res_id[:] = mol.res_id
    new_mol.res_name[:] = mol.res_name
    new_mol.bonds = mol.bonds.copy()

    return new_mol
def assemble_peptide(sequence):
    res_names = [seq.ProteinSequence.convert_letter_1to3(r) for r in sequence]
    peptide = struc.AtomArray(length=0)

    for res_id, res_name, connect_angle in zip(
            np.arange(1,
                      len(res_names) + 1), res_names,
            itertools.cycle([120, -120])):
        # Create backbone
        atom_n = struc.Atom([0.0, 0.0, 0.0], atom_name="N", element="N")

        atom_ca = struc.Atom([0.0, N_CA_LENGTH, 0.0],
                             atom_name="CA",
                             element="C")

        coord_c = calculate_atom_coord_by_z_rotation(atom_ca.coord,
                                                     atom_n.coord, 120,
                                                     CA_C_LENGTH)
        atom_c = struc.Atom(coord_c, atom_name="C", element="C")

        coord_o = calculate_atom_coord_by_z_rotation(atom_c.coord,
                                                     atom_ca.coord, 120,
                                                     C_O_DOUBLE_LENGTH)
        atom_o = struc.Atom(coord_o, atom_name="O", element="O")

        coord_h = calculate_atom_coord_by_z_rotation(atom_n.coord,
                                                     atom_ca.coord, -120,
                                                     N_H_LENGTH)
        atom_h = struc.Atom(coord_h, atom_name="H", element="H")

        backbone = struc.array([atom_n, atom_ca, atom_c, atom_o, atom_h])
        backbone.res_id[:] = res_id
        backbone.res_name[:] = res_name

        # Add bonds between backbone atoms
        bonds = struc.BondList(backbone.array_length())
        bonds.add_bond(0, 1, struc.BondType.SINGLE)  # N-CA
        bonds.add_bond(1, 2, struc.BondType.SINGLE)  # CA-C
        bonds.add_bond(2, 3, struc.BondType.DOUBLE)  # C-O
        bonds.add_bond(0, 4, struc.BondType.SINGLE)  # N-H
        backbone.bonds = bonds

        # Get residue from dataset
        residue = info.residue(res_name)
        # Superimpose backbone of residue
        # with backbone created previously
        _, transformation = struc.superimpose(
            backbone[struc.filter_backbone(backbone)],
            residue[struc.filter_backbone(residue)])
        residue = struc.superimpose_apply(residue, transformation)
        # Remove backbone atoms from residue because they are already
        # existing in the backbone created prevoisly
        side_chain = residue[~np.isin(
            residue.
            atom_name, ["N", "CA", "C", "O", "OXT", "H", "H2", "H3", "HXT"])]

        # Assemble backbone with side chain (including HA)
        # and set annotation arrays
        residue = backbone + side_chain
        residue.bonds.add_bond(
            np.where(residue.atom_name == "CA")[0][0],
            np.where(residue.atom_name == "CB")[0][0], struc.BondType.SINGLE)
        residue.bonds.add_bond(
            np.where(residue.atom_name == "CA")[0][0],
            np.where(residue.atom_name == "HA")[0][0], struc.BondType.SINGLE)
        residue.chain_id[:] = "A"
        residue.res_id[:] = res_id
        residue.res_name[:] = res_name
        peptide += residue

        # Connect current residue to existing residues in the chain
        if res_id > 1:
            index_prev_ca = np.where((peptide.res_id == res_id - 1)
                                     & (peptide.atom_name == "CA"))[0][0]
            index_prev_c = np.where((peptide.res_id == res_id - 1)
                                    & (peptide.atom_name == "C"))[0][0]
            index_curr_n = np.where((peptide.res_id == res_id)
                                    & (peptide.atom_name == "N"))[0][0]
            index_curr_c = np.where((peptide.res_id == res_id)
                                    & (peptide.atom_name == "C"))[0][0]
            curr_residue_mask = peptide.res_id == res_id

            # Adjust geometry
            curr_coord_n = calculate_atom_coord_by_z_rotation(
                peptide.coord[index_prev_c], peptide.coord[index_prev_ca],
                connect_angle, C_N_LENGTH)
            peptide.coord[curr_residue_mask] -= peptide.coord[index_curr_n]
            peptide.coord[curr_residue_mask] += curr_coord_n
            # Adjacent residues should show in opposing directions
            # -> rotate residues with even residue ID by 180 degrees
            if res_id % 2 == 0:
                coord_n = peptide.coord[index_curr_n]
                coord_c = peptide.coord[index_curr_c]
                peptide.coord[curr_residue_mask] = struc.rotate_about_axis(
                    atoms=peptide.coord[curr_residue_mask],
                    axis=coord_c - coord_n,
                    angle=np.deg2rad(180),
                    support=coord_n)

            # Add bond between previous C and current N
            peptide.bonds.add_bond(index_prev_c, index_curr_n,
                                   struc.BondType.SINGLE)

    # Add N-terminal hydrogen
    atom_n = peptide[(peptide.res_id == 1) & (peptide.atom_name == "N")][0]
    atom_h = peptide[(peptide.res_id == 1) & (peptide.atom_name == "H")][0]
    coord_h2 = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_h.coord,
                                                  -120, N_H_LENGTH)
    atom_h2 = struc.Atom(coord_h2,
                         chain_id="A",
                         res_id=1,
                         res_name=atom_h.res_name,
                         atom_name="H2",
                         element="H")
    peptide = struc.array([atom_h2]) + peptide
    peptide.bonds.add_bond(0, 1, struc.BondType.SINGLE)  # H2-N

    # Add C-terminal hydroxyl group
    last_id = len(sequence)
    index_c = np.where((peptide.res_id == last_id)
                       & (peptide.atom_name == "C"))[0][0]
    index_o = np.where((peptide.res_id == last_id)
                       & (peptide.atom_name == "O"))[0][0]
    coord_oxt = calculate_atom_coord_by_z_rotation(peptide.coord[index_c],
                                                   peptide.coord[index_o],
                                                   connect_angle, C_O_LENGTH)
    coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt,
                                                   peptide.coord[index_c],
                                                   connect_angle, O_H_LENGTH)
    atom_oxt = struc.Atom(coord_oxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="OXT",
                          element="O")
    atom_hxt = struc.Atom(coord_hxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="HXT",
                          element="H")
    peptide = peptide + struc.array([atom_oxt, atom_hxt])
    peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE)  # C-OXT
    peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE)  # OXT-HXT

    return peptide
Beispiel #5
0
.. Note::
    The universal length unit in *Biotite* is Å.
    This includes coordinates, distances, surface areas, etc.

Creating structures
-------------------

Let's begin by constructing some atoms:
"""

import biotite.structure as struc
atom1 = struc.Atom([0, 0, 0],
                   chain_id="A",
                   res_id=1,
                   res_name="GLY",
                   hetero=False,
                   atom_name="N",
                   element="N")
atom2 = struc.Atom([0, 1, 1],
                   chain_id="A",
                   res_id=1,
                   res_name="GLY",
                   hetero=False,
                   atom_name="CA",
                   element="C")
atom3 = struc.Atom([0, 0, 2],
                   chain_id="A",
                   res_id=1,
                   res_name="GLY",
                   hetero=False,
def get_atom_features(structure_input,
                      xtc_input,
                      atomgroup,
                      element,
                      grid_input=None,
                      top_atoms=None,
                      write=None,
                      pdb_vis=True,
                      grid_write=None):

    u = mda.Universe(structure_input, xtc_input)

    if pdb_vis is True:
        protein = u.select_atoms("protein")
        pdb_outname = structure_input[0:-4] + "_IonSites.pdb"
        u.trajectory[0]
        protein.write(pdb_outname)

    ## The density will be obtained from the universe which depends on the .xtc and .gro
    if grid_input is None:
        density_atomgroup = u.select_atoms("name " + atomgroup)
        D = DensityAnalysis(density_atomgroup, delta=1.0)
        D.run()
        if grid_write is not None:
            D.density.convert_density("Angstrom^{-3}")
            D.density.export(structure_input[:-4] + atomgroup + "_density.dx",
                             type="double")
            grid_input = atomgroup + "_density.dx"
        g = D.density
    else:
        g = Grid(grid_input)

    ##converting the density to a probability
    atom_number = len(u.select_atoms('name ' + atomgroup))
    grid_data = np.array(g.grid) * atom_number / np.sum(np.array(g.grid))

    ##mask all probabilities below the average water probability
    average_probability_density = atom_number / sum(
        1 for i in grid_data.flat if i)
    ##mask all grid centers with density less than threshold density
    grid_data[grid_data <= average_probability_density] = 0.0

    xyz, val = local_maxima_3D(grid_data)
    ##negate the array to get descending order from most prob to least prob
    val_sort = np.argsort(-1 * val.copy())
    # values = [val[i] for i in val_sort]
    coords = [xyz[i] for i in val_sort]
    maxdens_coord_str = [str(item)[1:-1] for item in coords]

    atom_frequencies = []

    if top_atoms is None:
        top_atoms = len(coords)
    elif top_atoms > len(coords):
        top_atoms = len(coords)

    print('\n')
    print('Featurizing ', top_atoms, ' Atoms')
    for atom_no in range(top_atoms):
        print('\n')
        print('Atom no: ', atom_no)
        print('\n')

        counting = []
        for i in tqdm(range(len(u.trajectory))):
            # for i in tqdm(range(100)):
            u.trajectory[i]
            ##list all water resids within sphere of radius 2 centered on water prob density maxima
            atomgroup_IDS = list(
                u.select_atoms('name ' + atomgroup + ' and point ' +
                               maxdens_coord_str[atom_no] + ' 2').indices)
            ##select only those resids that have all three atoms within the water pocket
            if len(atomgroup_IDS) == 0:
                atomgroup_IDS = [-1]
            counting.append(atomgroup_IDS)

        atom_ID = element + str(atom_no + 1)
        pocket_occupation_frequency = 1 - counting.count(-1) / len(counting)
        atom_location = coords[atom_no] + g.origin

        atom_frequencies.append(
            [atom_ID, atom_location, pocket_occupation_frequency])

        ##PDB_VISUALISATION
        ##rescursively add waters to the pdb file one by one as they are processed
        if pdb_vis is True:
            # # Read the file into Biotite's structure object (atom array)
            atom_array = strucio.load_structure(pdb_outname)
            # Shifting the coordinates by the grid origin
            atom_location = coords[atom_no] + g.origin
            # Add an HETATM
            atom = struc.Atom(
                coord=atom_location,
                chain_id="W",
                # The residue ID is the last ID in the file +1
                res_id=atom_array.res_id[-1] + 1,
                res_name=atom_ID,
                hetero=True,
                atom_name=atomgroup,
                element=element)
            atom_array += struc.array([atom])
            # Save edited structure
            strucio.save_structure(pdb_outname, atom_array)

    if pdb_vis is True:
        u_pdb = mda.Universe(pdb_outname)

        u_pdb.add_TopologyAttr('tempfactors')
        # Write values as beta-factors ("tempfactors") to a PDB file
        for res in range(len(atom_frequencies)):
            atom_resid = len(u_pdb.residues) - top_atoms + res
            u_pdb.residues[atom_resid].atoms.tempfactors = atom_frequencies[
                res][2]
        u_pdb.atoms.write(pdb_outname)

    if write is True:
        if not os.path.exists('atom_features/'):
            os.makedirs('atom_features/')
        filename = 'atom_features/PocketFrequencies.txt'
        with open(filename, 'w') as output:
            for row in atom_frequencies:
                output.write(str(row)[1:-1] + '\n')

    return atom_frequencies
def get_water_features(structure_input,
                       xtc_input,
                       atomgroup,
                       grid_wat_model=None,
                       grid_input=None,
                       top_waters=30,
                       write=None,
                       pdb_vis=True):

    u = mda.Universe(structure_input, xtc_input)

    if pdb_vis is True:
        protein = u.select_atoms("protein")
        pdb_outname = structure_input[0:-4] + "_WaterSites.pdb"
        u.trajectory[0]
        protein.write(pdb_outname)

    if grid_input is None:
        density_atomgroup = u.select_atoms("name " + atomgroup)
        # a resolution of delta=1.0 ensures the coordinates of the maxima match the coordinates of the simulation box
        D = DensityAnalysis(density_atomgroup, delta=1.0)
        D.run()
        if grid_wat_model is not None:
            D.density.convert_density(grid_wat_model)
            D.density.export(structure_input[:-4] + atomgroup + "_density.dx",
                             type="double")
            grid_input = atomgroup + "_density.dx"
        g = D.density
    else:
        g = Grid(grid_input)

    xyz, val = local_maxima_3D(g.grid)
    ##negate the array to get descending order from most prob to least prob
    val_sort = np.argsort(-1 * val.copy())
    coords = [xyz[i] for i in val_sort]
    maxdens_coord_str = [str(item)[1:-1] for item in coords]
    water_frequencies = []

    if top_waters > len(coords):
        top_waters = len(coords)

    print('\n')
    print('Featurizing ', top_waters, ' Waters')
    for wat_no in range(top_waters):
        print('\n')
        print('Water no: ', wat_no)
        print('\n')
        philist = []
        psilist = []

        ###extracting (psi,phi) coordinates for each water dipole specific to the frame they are bound
        counting = []
        for frame_no in tqdm(range(len(u.trajectory))):
            # for frame_no in tqdm(range(100)):
            u.trajectory[frame_no]
            ##list all water oxygens within sphere of radius X centered on water prob density maxima
            radius = ' 3.5'
            atomgroup_IDS = u.select_atoms('name ' + atomgroup +
                                           ' and point ' +
                                           maxdens_coord_str[wat_no] +
                                           radius).indices
            counting.append(atomgroup_IDS)

        ##making a list of the water IDs that appear in the simulation in that pocket
        flat_list = [item for sublist in counting for item in sublist]

        ###extracting (psi,phi) coordinates for each water dipole specific to the frame they are bound
        # for frame_no in tqdm(range(100)):
        for frame_no in tqdm(range(len(u.trajectory))):
            u.trajectory[frame_no]
            waters_resid = counting[frame_no]
            ##extracting the water coordinates for inside the pocket
            if len(waters_resid) == 1:
                ##(x,y,z) positions for the water atom (residue) at frame i
                water_atom_positions = [
                    list(pos)
                    for pos in u.select_atoms('byres index ' +
                                              str(waters_resid[0])).positions
                ]
                psi, phi = get_dipole(water_atom_positions)
                psilist.append(psi)
                philist.append(phi)
            ##if multiple waters in pocket then use water with largest frequency of pocket occupation
            elif len(waters_resid) > 1:
                freq_count = []
                for ID in waters_resid:
                    freq_count.append([flat_list.count(ID), ID])
                freq_count.sort(key=lambda x: x[0])
                water_atom_positions = [
                    list(pos)
                    for pos in u.select_atoms('byres index ' +
                                              str(freq_count[-1][1])).positions
                ]
                psi, phi = get_dipole(water_atom_positions)
                psilist.append(psi)
                philist.append(phi)
            ##10000.0 = no waters bound
            elif len(waters_resid) < 1:
                psilist.append(10000.0)
                philist.append(10000.0)

        water_out = [psilist, philist]
        water_ID = "O" + str(wat_no + 1)
        water_pocket_occupation_frequency = 1 - psilist.count(10000.0) / len(
            psilist)
        atom_location = coords[wat_no] + g.origin

        water_frequencies.append(
            [water_ID, atom_location, water_pocket_occupation_frequency])

        ##WRITE OUT WATER FEATURES INTO SUBDIRECTORY
        if write is True:
            if not os.path.exists('water_features/'):
                os.makedirs('water_features/')
            filename = 'water_features/' + structure_input[
                0:-4] + water_ID + '.txt'
            with open(filename, 'w') as output:
                for row in water_out:
                    output.write(str(row)[1:-1] + '\n')

        ##PDB_VISUALISATION
        ##rescursively add waters to the pdb file one by one as they are processed
        if pdb_vis is True:
            # # Read the file into Biotite's structure object (atom array)
            atom_array = strucio.load_structure(pdb_outname)
            # Shifting the coordinates by the grid origin
            atom_location = coords[wat_no] + g.origin
            # Add an HETATM
            atom = struc.Atom(
                coord=atom_location,
                chain_id="W",
                # The residue ID is the last ID in the file +1
                res_id=atom_array.res_id[-1] + 1,
                res_name=water_ID,
                hetero=True,
                atom_name=atomgroup,
                element="O")
            atom_array += struc.array([atom])
            # Save edited structure
            strucio.save_structure(pdb_outname, atom_array)

    if pdb_vis is True:
        u_pdb = mda.Universe(pdb_outname)
        u_pdb.add_TopologyAttr('tempfactors')
        # Write values as beta-factors ("tempfactors") to a PDB file
        for res in range(len(water_frequencies)):
            #scale the water resid by the starting resid
            water_resid = len(u_pdb.residues) - top_waters + res
            u_pdb.residues[water_resid].atoms.tempfactors = water_frequencies[
                res][2]
        u_pdb.atoms.write(pdb_outname)

    if write is True:
        filename = 'water_features/' + structure_input[
            0:-4] + 'WaterPocketFrequencies.txt'
        with open(filename, 'w') as output:
            for row in water_frequencies:
                output.write(str(row)[1:-1] + '\n')

    return water_frequencies