Esempio n. 1
0
def read_pdb_file(file_name, name=None):
    """
    Extract info from a PDB file
        file_name: path of pdb file
        name: name of the structure (default name of the file without extension)
        return:: (structure,R,polypeptides,sequence,seq_res_dict)

            structure: structure object
            residues: list of residues
            polypeptides: list of polypeptides in the structure
            sequence: combined sequence (for all polypeptides)
            seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to
                residues[seq_res_dict[i]]
    """

    if name is None:
        name = splitext(file_name)[0]

    structure = PDBParser().get_structure(name, file_name)

    if len(structure) != 1:
        raise ValueError("Unexpected number of structures in " + name)

    residues = Selection.unfold_entities(structure, 'R')
    atoms = Selection.unfold_entities(structure, 'A')
    # polypeptides = PPBuilder().build_peptides(structure)
    # if len(polypeptides) == 0:
    #     polypeptides = CaPPBuilder().build_peptides(structure)
    # sequence = ''.join([p.get_sequence().tostring() for p in polypeptides])
    # res_dict = dict(zip(residues, range(len(residues))))
    # seq_res_dict = [res_dict[residues] for p in polypeptides for residues in p]

    return structure, atoms, residues
def get_end_chain(structure, normal, axis, res_in_slice):
    """
    To get end-chain factor in this slice
    :param structure: a pdb structure object
    :return: end_chain factor
    """
    end_chain_counter = 0
    chain_lst = slt.unfold_entities(structure, 'C')  # Get all chains in list
    aa_name_lst = [
        'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE',
        'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'
    ]
    for chain in chain_lst:
        res_lst = slt.unfold_entities(chain, 'R')
        res_lst_clean = []
        for res in res_lst:
            hetero_atom = res.id[0][0]
            if res.id[0][0] == " ":
                res_lst_clean.append(res)
        if between_planes(normal, axis, res_lst_clean[0]['CA']):
            end_chain_counter = end_chain_counter + 1
        if between_planes(normal, axis, res_lst_clean[-1]['CA']):
            end_chain_counter = end_chain_counter + 1
    res_num = len(res_in_slice)
    end_chain = end_chain_counter / res_num
    return end_chain
Esempio n. 3
0
def pdb_neighbors(pdb_f, pdb_id):
    structure = PDBParser().get_structure(pdb_id, pdb_f)
    atom_list = Selection.unfold_entities(structure, 'A')
    ns = NeighborSearch(atom_list)
    center = [(a, a.get_coord()) for a in structure.get_atoms()
              if a.get_parent().get_resname() in ['PTR', 'SEP', 'TPO']
              and a.get_name() in ['O1P', 'O2P', 'O3P', 'OH', 'OG', 'OG1']]
    # if there are no phos-atomes, return
    if len(center) == 0:
        return ''
    neighbors = {}
    for a, c in center:
        # set neighbor distance cutoff
        neighbor_list = ns.search(c, BOND_CUTOFF)
        residue_list = list(set(Selection.unfold_entities(neighbor_list, 'R')))
        try:
            neighbors[Selection.unfold_entities(a, 'R')[0]] = [
                x for x in residue_list
                if not x == Selection.unfold_entities(a, 'R')[0]
            ]
        except:
            continue
    # add residue id and chain id
    neighbors_full = dict([('_'.join(
        [k.get_resname(),
         str(k.get_id()[1]),
         str(k.get_parent().get_id())]), [
             '_'.join([
                 vi.get_resname(),
                 str(vi.get_id()[1]),
                 str(vi.get_parent().get_id())
             ]) for vi in v
         ]) for k, v in neighbors.iteritems()])
    return neighbors_full
Esempio n. 4
0
def within(resnum, angstroms, chain_id, model, use_ca=False, custom_coord=None):
    """See: https://www.biostars.org/p/1816/ https://www.biostars.org/p/269579/

    Args:
        resnum (int):
        angstroms (float):
        chain_id (str):
        model (Model):
        use_ca (bool): If the alpha-carbon atom should be used for the search, otherwise use the last atom of the residue
        custom_coord (list): Custom XYZ coordinate to get within

    Returns:
        list: List of Bio.PDB.Residue.Residue objects

    """
    # XTODO: documentation
    # TODO: should have separate method for within a normal residue (can use "resnum" with a int) or a custom coord,
    # where you don't need to specify resnum
    atom_list = Selection.unfold_entities(model, 'A')
    ns = NeighborSearch(atom_list)

    if custom_coord:  # a list of XYZ coord
        target_atom_coord = np.array(custom_coord, 'f')
    else:
        target_residue = model[chain_id][resnum]
        if use_ca:
            target_atom = target_residue['CA']
        else:
            target_atom = target_residue.child_list[-1]
        target_atom_coord = np.array(target_atom.get_coord(), 'f')
    neighbors = ns.search(target_atom_coord, angstroms)
    residue_list = Selection.unfold_entities(neighbors, 'R')

    return residue_list
Esempio n. 5
0
def read_pdb_file(file_name, name=None):
    """
    Extract info from a PDB file
        file_name: path of pdb file
        name: name of the structure (default name of the file without extension)
        return:: (structure,R,polypeptides,sequence,seq_res_dict)

            structure: structure object
            residues: list of residues
            polypeptides: list of polypeptides in the structure
            sequence: combined sequence (for all polypeptides)
            seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to
                residues[seq_res_dict[i]]
    """

    if name is None:
        name = splitext(file_name)[0]

    structure = PDBParser().get_structure(name, file_name)

    if len(structure) != 1:
        raise ValueError("Unexpected number of structures in " + name)

    residues = Selection.unfold_entities(structure, 'R')
    atoms = Selection.unfold_entities(structure, 'A')
    # polypeptides = PPBuilder().build_peptides(structure)
    # if len(polypeptides) == 0:
    #     polypeptides = CaPPBuilder().build_peptides(structure)
    # sequence = ''.join([p.get_sequence().tostring() for p in polypeptides])
    # res_dict = dict(zip(residues, range(len(residues))))
    # seq_res_dict = [res_dict[residues] for p in polypeptides for residues in p]

    return structure, atoms, residues
    def __init__(self, fasta_align, m1, m2, si=0, sj=1):
        """Initialise.

        Attributes:

        - fasta_align - Alignment object
        - m1, m2 - two models
        - si, sj - the sequences in the Alignment object that
          correspond to the structures

        """
        l = fasta_align.get_alignment_length()
        # Get the residues in the models
        rl1 = Selection.unfold_entities(m1, 'R')
        rl2 = Selection.unfold_entities(m2, 'R')
        # Residue positions
        p1 = 0
        p2 = 0
        # Map equivalent residues to each other
        map12 = {}
        map21 = {}
        # List of residue pairs (None if -)
        duos = []
        for i in range(0, l):
            column = fasta_align[:, i]
            aa1 = column[si]
            aa2 = column[sj]
            if aa1 != "-":
                # Position in seq1 is not -
                while True:
                    # Loop until an aa is found
                    r1 = rl1[p1]
                    p1 = p1 + 1
                    if is_aa(r1):
                        break
                self._test_equivalence(r1, aa1)
            else:
                r1 = None
            if aa2 != "-":
                # Position in seq2 is not -
                while True:
                    # Loop until an aa is found
                    r2 = rl2[p2]
                    p2 = p2 + 1
                    if is_aa(r2):
                        break
                self._test_equivalence(r2, aa2)
            else:
                r2 = None
            if r1:
                # Map residue in seq1 to its equivalent in seq2
                map12[r1] = r2
            if r2:
                # Map residue in seq2 to its equivalent in seq1
                map21[r2] = r1
            # Append aligned pair (r is None if gap)
            duos.append((r1, r2))
        self.map12 = map12
        self.map21 = map21
        self.duos = duos
def _get_contacts(pdb_path, chain_rec, chain_lig, contact_dist):
    structure = pdb_parser.get_structure('X', pdb_path)[0]

    receptor = [structure[chain_rec_id] for chain_rec_id in chain_rec]
    ligand = [structure[chain_lig_id] for chain_lig_id in chain_lig]

    receptor_atoms = Selection.unfold_entities(receptor, 'A')
    ns = NeighborSearch(receptor_atoms)

    ligand_residues = Selection.unfold_entities(ligand, 'R')
    contacts = set([])
    contacts_lig = set([])
    contacts_rec = set([])
    for ligand_res in ligand_residues:
        lig_resname = dindex_to_1[d3_to_index[ligand_res.get_resname()]]
        lig_resnum = ligand_res.get_id()[1]
        lig_chname = ligand_res.get_parent().get_id()
        res_contacts = []

        for lig_atom in ligand_res:
            neighbors = ns.search(lig_atom.get_coord(), contact_dist)
            res_contacts += Selection.unfold_entities(neighbors, 'R')

        for receptor_res in res_contacts:
            rec_resname = dindex_to_1[d3_to_index[receptor_res.get_resname()]]
            rec_resnum = receptor_res.get_id()[1]
            rec_chname = receptor_res.get_parent().get_id()

            contacts.add((rec_resname, rec_resnum, rec_chname, lig_resname,
                          lig_resnum, lig_chname))
            contacts_lig.add((lig_resname, lig_resnum, lig_chname))
            contacts_rec.add((rec_resname, rec_resnum, rec_chname))

    return contacts, contacts_rec, contacts_lig
def pdb_neighbors(pdb_f, pdb_id):
    structure = PDBParser().get_structure(pdb_id, pdb_f)
    atom_list = Selection.unfold_entities(structure, 'A')
    ns = NeighborSearch(atom_list)
    center_res = [res for res in structure.get_residues() if res.get_resname() in ['PTR','SEP','TPO']]

    neighbors = []
    for res in center_res:
        if res.get_resname() == 'PTR':
            atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OH']]
        elif res.get_resname() == 'SEP':
            atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OG']]
        elif res.get_resname() == 'TPO':
            atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OG1']]
        atom_neighbors = [ns.search(a.get_coord(),BOND_CUTOFF) for a in atoms]
        atom_neighbors = [atom for atoms in atom_neighbors for atom in atoms]
        atom_neighbors = list(set(atom_neighbors))
        atom_neighbors = [atom for atom in atom_neighbors if 'N' in atom.get_name() or 'O' in atom.get_name()]
        atom_neighbors = list(set(Selection.unfold_entities(atom_neighbors,'R')))
        atom_neighbors = [r for r in atom_neighbors if r != res]

        if len(atom_neighbors) > 0:
            res = res.get_resname()+'_'+str(res.get_id()[1])+'_'+res.get_parent().get_id()
            atom_neighbors = [n.get_resname()+'_'+str(n.get_id()[1])+'_'+n.get_parent().get_id() for n in atom_neighbors]
            neighbors.append((pdb_id,res,atom_neighbors))

    return neighbors
Esempio n. 9
0
    def __init__(self, fasta_align, m1, m2, si=0, sj=1):
        """Initialize.

        Attributes:
         - fasta_align - Alignment object
         - m1, m2 - two models
         - si, sj - the sequences in the Alignment object that
           correspond to the structures

        """
        length = fasta_align.get_alignment_length()
        # Get the residues in the models
        rl1 = Selection.unfold_entities(m1, 'R')
        rl2 = Selection.unfold_entities(m2, 'R')
        # Residue positions
        p1 = 0
        p2 = 0
        # Map equivalent residues to each other
        map12 = {}
        map21 = {}
        # List of residue pairs (None if -)
        duos = []
        for i in range(length):
            column = fasta_align[:, i]
            aa1 = column[si]
            aa2 = column[sj]
            if aa1 != "-":
                # Position in seq1 is not -
                while True:
                    # Loop until an aa is found
                    r1 = rl1[p1]
                    p1 = p1 + 1
                    if is_aa(r1):
                        break
                self._test_equivalence(r1, aa1)
            else:
                r1 = None
            if aa2 != "-":
                # Position in seq2 is not -
                while True:
                    # Loop until an aa is found
                    r2 = rl2[p2]
                    p2 = p2 + 1
                    if is_aa(r2):
                        break
                self._test_equivalence(r2, aa2)
            else:
                r2 = None
            if r1:
                # Map residue in seq1 to its equivalent in seq2
                map12[r1] = r2
            if r2:
                # Map residue in seq2 to its equivalent in seq1
                map21[r2] = r1
            # Append aligned pair (r is None if gap)
            duos.append((r1, r2))
        self.map12 = map12
        self.map21 = map21
        self.duos = duos
Esempio n. 10
0
def secondary_struc_cmap(chain,
                         sequence,
                         structure,
                         cutoff_distance=4.5,
                         cutoff_numcontacts=10,
                         exclude_neighbour=3,
                         ss_elements=['H', 'E', 'B', 'b', 'G']):

    atom_list = Selection.unfold_entities(chain, 'A')
    res_list = Selection.unfold_entities(chain, 'R')

    res_names, numbering = [], []
    for res in res_list:
        res_names.append(res.get_resname())
        numbering.append(res.get_id()[1])

    numbering = np.array(numbering)
    res_range = np.array(range(len(numbering)))

    assert len(structure) == len(
        numbering
    ), f'PDB file and Secondary structure map do not match!\n {chain.get_parent().get_parent().id} - PDB: {len(res_list)} Residues VS. STRIDE: {len(sequence)} Residues. '

    ns = NeighborSearch(atom_list)
    all_neighbours = ns.search_all(cutoff_distance, 'A')

    struc_length = len(structure)
    segment = np.zeros([struc_length], dtype='int')
    nseg = 1
    for i in range(struc_length):
        if structure[i] in ss_elements:
            segment[i] = nseg
            if i == struc_length:
                nseg += 1
            elif structure[i + 1] != structure[i]:
                nseg += 1
    nseg -= 1

    index_list = []
    for atompair in all_neighbours:
        res1 = res_range[numbering == atompair[0].get_parent().id[1]][0]
        res2 = res_range[numbering == atompair[1].get_parent().id[1]][0]

        if abs(res1 - res2) > exclude_neighbour:
            if segment[res1] != 0 and segment[res2] != 0 and segment[
                    res1] != segment[res2]:
                index_list.append((segment[res1] - 1, segment[res2] - 1))

    index_list.sort()
    count = Counter(index_list)
    index = [values for values in count if count[values] >= cutoff_numcontacts]

    return np.array(index), segment
Esempio n. 11
0
def pp_getcgeom(pp, sidechains=0):
    if sidechains:
        sum_coords = sum(
            [atom.get_coord() for atom in Selection.unfold_entities(pp, "A")])
        centre_g = sum_coords / len(Selection.unfold_entities(pp, "A"))
    else:
        backbone = ["CA", "CB", "N", "O"]
        coords = [
            atom.get_coord() for atom in Selection.unfold_entities(pp, "A")
            if atom.get_id() in backbone
        ]
        centre_g = sum(coords) / len(coords)
    return centre_g
Esempio n. 12
0
def get_res_dist(struct1, struct2, alignment):
    struct1_residues = Selection.unfold_entities(struct1, 'R')
    struct2_residues = Selection.unfold_entities(struct2, 'R')
    av_dist = 0.0
    for res1, res2 in alignment:
        if ('CA' in struct1_residues[res1]) and ('CA'
                                                 in struct2_residues[res2]):
            ca1 = struct1_residues[res1]['CA']
            ca2 = struct2_residues[res2]['CA']
            av_dist += ca1 - ca2
        else:
            continue

    return av_dist / float(len(alignment))
Esempio n. 13
0
 def interaction(self, pdb_id, filename, domain_1, domain_2):
     """Returns a dict with informations (atoms, residues...) if two domains
     interact with each other, and returns False if not."""
     print "Searching for interactions in "+pdb_id+"..."
     # creates a strucuture object/class to extract atoms of the two domains
     model = structure(pdb_id).get_model(pdb_id, filename)
     residues_1 = structure(pdb_id).get_residues(model, domain_1)
     residues_2 = structure(pdb_id).get_residues(model, domain_2)
     atoms_1 = Selection.unfold_entities(residues_1, 'A')
     atoms_2 = Selection.unfold_entities(residues_2, 'A')
     # gets the serial numbers of the atoms
     numbers_1 = structure(pdb_id).serial_numbers(atoms_1)
     numbers_2 = structure(pdb_id).serial_numbers(atoms_2)
     # the search starts here !
     atoms = Selection.unfold_entities(model, 'A')
     nsearch = NeighborSearch(atoms)
     interacting_atoms_1 = []
     interacting_atoms_2 = []
     for atom in atoms:
         if atom.get_serial_number() in numbers_1:
             point = atom.get_coord()
             # This is how we detect an interaction, we put 5 angstroms
             # here.
             # This is the simplest method we can use, and we're not sure
             # that it is correct.
             # Originally we have planned to go further by doing a surface
             # and accesssion analysis, but we had no time.
             # We hope we can talk about that during the talk.
             neighbors = nsearch.search(point, 5)
             for neighbor in neighbors:
                 if neighbor.get_serial_number() in numbers_2:
                     interacting_atoms_2.append(neighbor)
                     if atom not in interacting_atoms_1:
                         interacting_atoms_1.append(atom)
     # returns a dict with all residues and atoms
     if len(interacting_atoms_2) > 0:
         infos = {}
         infos['1'] = {}
         infos['2'] = {}
         # just get the parent residues for the list of atoms
         interacting_residues_1 = structure(pdb_id).atoms2residues(
                 interacting_atoms_1)
         interacting_residues_2 = structure(pdb_id).atoms2residues(
                 interacting_atoms_2)
         infos['1']['atoms'] = interacting_atoms_1
         infos['2']['atoms'] = interacting_atoms_2
         infos['1']['residues'] = interacting_residues_1
         infos['2']['residues'] = interacting_residues_2
         return infos
     else: return False
Esempio n. 14
0
    def __init__(self, align, m1, m2):
        """Produces a structural alignment of two models
        Input:
        - fasta_align - Alignment object
        - m1, m2 - two models
        - si, sj - the sequences in the Alignment object that correspond to the structures
        """
        length = align[4]-align[3]
        # Get the residues in the models
        rl1 = Selection.unfold_entities(m1, 'R')
        rl2 = Selection.unfold_entities(m2, 'R')
        # Residue positions
        p1 = 0
        p2 = 0
        # Map equivalent residues to each other
        map12 = {}
        map21 = {}
        residue_pairs = []
        for i in range(length):
            aa1 = align[0][i]
            aa2 = align[1][i]
            if aa1 != "-":
                while True:
                    r1 = rl1[p1]
                    p1 = p1 + 1
                    if is_aa(r1):
                        break
                    self._test_equivalence(r1, aa1)
            else:
                r1 = None
            if aa2 != "-":
                while True:
                    r2 = rl2[p2]
                    p2 = p2 +1
                    if is_aa(r2):
                        break
                    self._test_equivalence(r2, aa2)
            else:
                r2 = None
            if r1:
                map12[r1] = r2
            if r2:
                map21[r2] = r1

            residue_pairs.append((r1,r2))
            self.map12 = map12
            self.map21 = map21
            self.residue_pairs= residue_pairs
Esempio n. 15
0
def read_pdb_file(file_name, name=None):
    """
    Extract info from a PDB file
        file_name: path of pdb file
        name: name of the structure (default name of the file without extension)
        return:: (structure,R,polypeptides,sequence,seq_res_dict)

            structure: structure object
            residues: list of residues
            polypeptides: list of polypeptides in the structure
            sequence: combined sequence (for all polypeptides)
            seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to
                residues[seq_res_dict[i]]
    """

    if name is None:
        name = splitext(file_name)[0]

    structure = PDBParser().get_structure(name, file_name)

    if len(structure) != 1:
        raise ValueError("Unexpected number of structures in " + name)

    # residues = Selection.unfold_entities(structure, 'R')
    atoms = Selection.unfold_entities(structure, 'A')
    polypeptides = PPBuilder().build_peptides(structure)
    if len(polypeptides) == 0:
        polypeptides = CaPPBuilder().build_peptides(structure)
    sequence = ''.join([str(p.get_sequence()) for p in polypeptides])
    residues = [residue for polypeptide in polypeptides for residue in polypeptide]
    protein_name = os.path.basename(file_name).replace(".pdb", "")
    return protein_name, structure, residues, sequence, atoms
Esempio n. 16
0
def pdb_dist(pdb_f, pdb_id):
    structure = PDBParser().get_structure(pdb_id, pdb_f)
    atom_list = Selection.unfold_entities(structure, 'A')
    ns = NeighborSearch(atom_list)
    center = [a for a in structure.get_atoms() if a.get_parent().get_resname(
    ) in ['PTR', 'SEP', 'TPO'] and a.get_name() in ['O1P', 'O2P', 'O3P', 'OH', 'OG', 'OG1']]

    # neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname(
    # ) in ['ARG'] and a.get_name() in ['NE', 'NH2', 'NH1']]
    # neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname(
    # ) in ['Lys'] and a.get_name() in ['NZ']]
    neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname(
    ) in ['HIS'] and a.get_name() in ['ND1','NE2']]

    def calc_dist(a, b):
        vector = a.coord - b.coord
        return np.sqrt(np.sum(vector * vector))

    dist = {}
    for c in center:
        for n in neighbors:
            value = calc_dist(c, n)
            key = str(c.get_parent()) + '_' + str(n.get_parent())
            if not key in dist.keys():
                dist[key] = [value]
            else:
                dist[key].append(value)

    for k, v in dist.items():
        dist[k] = min(v)

    dist = [v for k, v in dist.items()]

    return dist
Esempio n. 17
0
def read_pdb_file(file_name, name=None):
    """
    Extract info from a PDB file
        file_name: path of pdb file
        name: name of the structure (default name of the file without extension)
        return:: (structure,R,polypeptides,sequence,seq_res_dict)

            structure: structure object
            residues: list of residues
            polypeptides: list of polypeptides in the structure
            sequence: combined sequence (for all polypeptides)
            seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to
                residues[seq_res_dict[i]]
    """

    if name is None:
        name = splitext(file_name)[0]

    structure = PDBParser().get_structure(name, file_name)

    if len(structure) != 1:
        raise ValueError("Unexpected number of structures in " + name)

    # residues = Selection.unfold_entities(structure, 'R')
    atoms = Selection.unfold_entities(structure, 'A')
    polypeptides = PPBuilder().build_peptides(structure)
    if len(polypeptides) == 0:
        polypeptides = CaPPBuilder().build_peptides(structure)
    sequence = ''.join([str(p.get_sequence()) for p in polypeptides])
    residues = [
        residue for polypeptide in polypeptides for residue in polypeptide
    ]
    protein_name = os.path.basename(file_name).replace(".pdb", "")
    return protein_name, structure, residues, sequence, atoms
Esempio n. 18
0
    def __init__(self, model, pdb_file=None):

        # Issue warning if pdb_file is given
        if pdb_file is not None:
            warnings.warn(("ResidueDepth no longer requires a pdb file."
                           " This argument will be removed in a future release"
                           " of Biopython."),
                           BiopythonDeprecationWarning)

        depth_dict = {}
        depth_list = []
        depth_keys = []
        # get_residue
        residue_list = Selection.unfold_entities(model, 'R')
        # make surface from PDB file using MSMS
        surface = get_surface(model)
        # calculate rdepth for each residue
        for residue in residue_list:
            if not is_aa(residue):
                continue
            rd = residue_depth(residue, surface)
            ca_rd = ca_depth(residue, surface)
            # Get the key
            res_id = residue.get_id()
            chain_id = residue.get_parent().get_id()
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra['EXP_RD'] = rd
            residue.xtra['EXP_RD_CA'] = ca_rd
        AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
Esempio n. 19
0
def GetResidueDepPDB(pdb, pdbfile):
    s  = GetStructure(pdb)
    model = s[0]
    residuelist = Selection.unfold_entities(model, 'R')
    try:
        surface = get_surface(pdbfile, PDBTOXYZ, MSMS)
    except:
        print "cannot get surface for " + pdbfile
        return
    content = ""
    for residue in residuelist:
        if not is_aa(residue):
            continue
        # minimun average depth for all atoms
        resid   = residue.get_id()
        resname = residue.get_resname()
        chainid = residue.get_parent().get_id()
        try:
            rd = residue_depth(residue, surface)
        except:
            continue
        ca_rd = ca_depth(residue, surface)
        info    = [pdb, chainid, resid[1], resname, str(rd), str(ca_rd)]
        for each in info:
            if not each:
                continue
        #print info
        newline = "\t".join(map(str, info)) + "\n"
        content = content + newline

    mutex_writefile.acquire()
    outobj = open(OUT, "a")
    outobj.write(content)
    outobj.close()
    mutex_writefile.release()
Esempio n. 20
0
def get_surface(model, MSMS="msms"):
    """Represent molecular surface as a vertex list array.

    Return a Numpy array that represents the vertex list of the
    molecular surface.

    Arguments:
     - MSMS - msms executable (used as argument to subprocess.call)

    """
    # Replace pdb_to_xyzr
    # Make x,y,z,radius file
    atom_list = Selection.unfold_entities(model, "A")

    xyz_tmp = tempfile.mktemp()
    with open(xyz_tmp, "w") as pdb_to_xyzr:
        for atom in atom_list:
            x, y, z = atom.coord
            radius = _get_atom_radius(atom, rtype="united")
            pdb_to_xyzr.write(f"{x:6.3f}\t{y:6.3f}\t{z:6.3f}\t{radius:1.2f}\n")

    # make surface
    surface_tmp = tempfile.mktemp()
    MSMS = MSMS + " -probe_radius 1.5 -if %s -of %s > " + tempfile.mktemp()
    make_surface = MSMS % (xyz_tmp, surface_tmp)
    subprocess.call(make_surface, shell=True)
    surface_file = surface_tmp + ".vert"
    if not os.path.isfile(surface_file):
        raise RuntimeError(
            f"Failed to generate surface file using command:\n{make_surface}")

    # read surface vertices from vertex file
    surface = _read_vertex_array(surface_file)
    return surface
Esempio n. 21
0
 def interchain_residue_contacts(self, chain_ids_1, chain_ids_2, radius):
     """ Generate a list of residue contacts between two chains. """
     all_chains = {chain.get_id(): chain for chain in self.get_chains()}
     selected_chains = {
         chain_id: chain
         for chain_id, chain in all_chains.items()
         if chain_id in chain_ids_1 + chain_ids_2
     }
     atoms = [
         atom for chain_id, chain in selected_chains.items()
         for atom in Selection.unfold_entities(chain, "A")
     ]
     residue_contacts = NeighborSearch(atoms).search_all(radius, "R")
     classified_contacts = defaultdict(list)
     for contact in residue_contacts:
         chain_1, chain_2 = [
             residue.get_parent().get_id() for residue in contact
         ]
         if chain_1 in chain_ids_1 and chain_2 in chain_ids_2:
             classified_contacts[(chain_1, chain_2)].append({
                 chain_1:
                 contact[0],
                 chain_2:
                 contact[1]
             })
         elif chain_2 in chain_ids_1 and chain_1 in chain_ids_2:
             classified_contacts[(chain_2, chain_1)].append({
                 chain_1:
                 contact[0],
                 chain_2:
                 contact[1]
             })
     return classified_contacts
def volume_delaunay(model):
    """Returns dictionary containing volume for each residue in `model`.

    Parameters
    ----------
    model: Bio.PDB.Model.Model
        Model of the protein structure.

    """

    volume_dict = {}
    atoms = np.array([atom for atom in model.get_atoms()])
    delaunay = Delaunay([atom.coord for atom in atoms])

    for simplex in delaunay.simplices:
        parent_residues = Selection.get_unique_parents(atoms[simplex])
        # Simplex is taken into account only if is totally contained in one
        # residue.
        if len(parent_residues) is 1:
            unique_parent = label_residue(parent_residues[0])
            cv_simplex = ConvexHull([atom.coord for atom in atoms[simplex]])
            volume_dict.setdefault(unique_parent, 0)
            volume_dict[unique_parent] += cv_simplex.volume

    return volume_dict
Esempio n. 23
0
def calculateNeighbors(filename, radius):
	data = {}
	structure = parser.get_structure(filename.split(".pdb")[0], filename)
	atom_list = Selection.unfold_entities(structure, 'A') # A for atoms
	residue_list = Selection.unfold_entities(structure, 'R') # R for residues
	neighbor_search = NeighborSearch(atom_list)
	
	for residue in residue_list:
		resid = str(residue.get_id()[1])
		contacts = []
		for atom in residue.get_list():
			contacts.extend(neighbor_search.search(atom.get_coord(), radius, level = "A"))
		burial = len(contacts)/len(residue.get_list())
		data[resid] = burial
	
	return data
Esempio n. 24
0
def chain2pos_scan_str(chain, pdb, mutation_set='a'):
  """
  Takes a chain ID and a model.PDBFile object, returns a string
  suitable as the PositionScan line for FoldX.
  """
  parser = PDBParser(PERMISSIVE=1)
  pdbfn = pdb.fullpath()
  struct = parser.get_structure(pdb.uuid, pdbfn)[0]
  #chains = pdb_extract_chain_seqs(struct)
  
  chainlist = Selection.unfold_entities(struct, 'C')
  
  position_scan_str = ''
  for c in chainlist:
    if c.id == chain:
      for r in c:
        try:
          aa = three_to_one(r.get_resname())
          resnum = r.id[1]
          position_scan_str += '%s%s%i%s,' % (aa, chain, resnum, mutation_set)
        except:
          # non-native amino acid or water
          pass


  position_scan_str = position_scan_str[:-1]
  
  return position_scan_str
Esempio n. 25
0
def compute_interactions(pdb_name):
    # Ensure that the PDB name is lowercase
    pdb_name = pdb_name.lower()

    # Get the pdb structure
    s = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name)

    # Get the preferred chain
    preferred_chain = s.preferred_chain.split(',')[0]

    # Get the Biopython structure for the PDB
    s = pdb_get_structure(pdb_name)

    # Get all atoms
    atom_list = Selection.unfold_entities(s.get_chains().__next__(), 'A')

    # Search for all neighbouring residues
    ns = NeighborSearch(atom_list)
    all_neighbors = ns.search_all(4.5, "R")

    # Filter all pairs containing non AA residues
    all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])]

    # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart
    all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES]

    # For each pair of interacting residues, determine the type of interaction
    interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1])) for res_pair in all_aa_neighbors]

    # Split unto classified and unclassified.
    classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0]

    return classified
Esempio n. 26
0
 def collectHbondsNumber(self, foldxIndivFile):
     wt_mut_chain_id = whatMutation(foldxIndivFile)
     prot_chain = wt_mut_chain_id[3]
     prot_mut_id = int(wt_mut_chain_id[2])
     model = self.structure[0]
     myres = model[prot_chain][prot_mut_id]
     atoms = Selection.unfold_entities(model, 'A')  # A for atoms
     ns = NeighborSearch(atoms)
     resname = myres.get_resname()
     h_bond_as_donor = []
     h_bond_as_acceptor = []
     for atom in myres:
         if atom.name in donors:
             #atoms  = Selection.unfold_entities(model, 'A')
             #print(atoms)
             # cutoff of 3.2 angstroms D-A, strong mostly covalent according to
             # Jeffrey, George A.; An introduction to hydrogen bonding, Oxford University Press, 1997.
             close_atoms = ns.search(atom.coord, 3.2)
             for close_atom in close_atoms:
                 full_atom_id = close_atom.get_full_id()
                 if (full_atom_id[3][1] != prot_mut_id) and (
                         full_atom_id[4][0]
                         in acceptors + main_chain_acceptors):
                     acceptor_atom_id = full_atom_id[3][1]
                     h_bond_as_donor.append(acceptor_atom_id)
         # do the same thing fro acceptor atoms
         if atom.name in acceptors:
             close_atoms = ns.search(atom.coord, 3.2)
             for close_atom in close_atoms:
                 full_atom_id = close_atom.get_full_id()
                 if (full_atom_id[3][1] != prot_mut_id) and (
                         full_atom_id[4][0] in donors + main_chain_donors):
                     acceptor_atom_id = full_atom_id[3][1]
                     h_bond_as_acceptor.append(acceptor_atom_id)
     return len(h_bond_as_donor) + len(h_bond_as_acceptor)
Esempio n. 27
0
    def __init__(self, model, msms_exec=None):
        """Initialize the class."""
        if msms_exec is None:
            msms_exec = "msms"

        depth_dict = {}
        depth_list = []
        depth_keys = []
        # get_residue
        residue_list = Selection.unfold_entities(model, "R")
        # make surface from PDB file using MSMS
        surface = get_surface(model, MSMS=msms_exec)
        # calculate rdepth for each residue
        for residue in residue_list:
            if not is_aa(residue):
                continue
            rd = residue_depth(residue, surface)
            ca_rd = ca_depth(residue, surface)
            # Get the key
            res_id = residue.get_id()
            chain_id = residue.get_parent().get_id()
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra["EXP_RD"] = rd
            residue.xtra["EXP_RD_CA"] = ca_rd
        AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
Esempio n. 28
0
    def __init__(self, model, pdb_file=None):
        """Initialize the class."""
        # Issue warning if pdb_file is given
        if pdb_file is not None:
            warnings.warn(
                "ResidueDepth no longer requires a pdb file. "
                "This argument will be removed in a future release "
                "of Biopython.", BiopythonDeprecationWarning)

        depth_dict = {}
        depth_list = []
        depth_keys = []
        # get_residue
        residue_list = Selection.unfold_entities(model, 'R')
        # make surface from PDB file using MSMS
        surface = get_surface(model)
        # calculate rdepth for each residue
        for residue in residue_list:
            if not is_aa(residue):
                continue
            rd = residue_depth(residue, surface)
            ca_rd = ca_depth(residue, surface)
            # Get the key
            res_id = residue.get_id()
            chain_id = residue.get_parent().get_id()
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra['EXP_RD'] = rd
            residue.xtra['EXP_RD_CA'] = ca_rd
        AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
Esempio n. 29
0
    def collectSaltBridge(self, position, chain):
        model = self.structure[0]
        myres = model[chain][position]
        atoms = Selection.unfold_entities(model, 'A')  # A for atoms

        ns = NeighborSearch(atoms)
        if myres.get_resname() not in ['ARG', 'LYS', 'ASP', 'GLU', 'HIS']:
            is_sb = 0
        else:
            for atom in myres:
                atom_id = atom.get_full_id()
                if atom_id[4][0] in ['NH1', 'NH2', 'NZ', 'NE2']:
                    close_atoms = ns.search(
                        atom.coord, 4.5
                    )  # cutoff of 4 crieria fixed by Barlow, J M Thornton (PMID6887253) +0.5A to account for the unoptimised side chain
                    if any(atom in [atomtype.id for atomtype in close_atoms]
                           for atom in ['OE1', 'OE2', 'OD1', 'OD2']):
                        is_sb = 1
                        break
                    else:
                        is_sb = 0
                        break
                elif atom_id[4][0] in ['OE1', 'OE2', 'OD1', 'OD2']:
                    close_atoms = ns.search(atom.coord, 4.5)

                    if any(atom in [atomtype.id for atomtype in close_atoms]
                           for atom in ['NH1', 'NH2', 'NZ', 'NE2']):
                        is_sb = 1
                        break
                    else:
                        is_sb = 0
                        break
        return is_sb
Esempio n. 30
0
def structure_filtered_dca_get_pairwise_distances_and_sequence(
        pdb_id, chain_id, start, end, pdb_directory='./pdbs/'):

    parser = PDBParser()

    structure = parser.get_structure(
        pdb_id,
        os.path.join(
            pdb_directory,
            "%s%s_%s-%s.pdb" % (pdb_id, chain_id, str(start), str(end))))

    structure = structure[0][chain_id]

    sequence = structure_filtered_dca_get_sequence_from_structure(structure)

    res_list = Selection.unfold_entities(structure, 'R')

    res_list = [
        residue for residue in res_list
        if not structure_filtered_dca_is_hetero(residue)
    ]

    pairwise_distances = []

    for i, res1 in enumerate(res_list):

        pairwise_distances.append([])

        for j, res2 in enumerate(res_list):

            pairwise_distances[-1].append(
                structure_filtered_dca_get_interaction_distance(res1, res2))

    return np.array(pairwise_distances), sequence
Esempio n. 31
0
def get_PDB_indices(chain_obj):
    list_indices = []
    for residue in Selection.unfold_entities(chain_obj, 'R'):
        if is_residue(residue):
            list_indices.append(
                (residue.get_full_id()[1], residue.get_full_id()[2],
                 residue.get_id()[1]))
    list_indices = np.array(list_indices)
    return list_indices
Esempio n. 32
0
def extract_beads(pdb_path):
    amino_acids = pd.read_csv('/home/hyang/bio/erf/data/amino_acids.csv')
    vocab_aa = [x.upper() for x in amino_acids.AA3C]
    vocab_dict = {
        x.upper(): y
        for x, y in zip(amino_acids.AA3C, amino_acids.AA)
    }

    p = PDBParser()
    try:
        structure = p.get_structure('X', pdb_path)
    except:
        return 0
    residue_list = Selection.unfold_entities(structure, 'R')

    ca_center_list = []
    cb_center_list = []
    res_name_list = []
    res_num_list = []
    chain_list = []

    for res in residue_list:
        if res.get_resname() not in vocab_aa:
            # raise ValueError('protein has non natural amino acids')
            continue
        chain_list.append(res.parent.id)
        res_name_list.append(vocab_dict[res.get_resname()])
        res_num_list.append(res.id[1])
        try:
            ca_center_list.append(res['CA'].get_coord())
        except KeyError:
            return 0
        if res.get_resname() != 'GLY':
            try:
                cb_center_list.append(res['CB'].get_coord())
            except KeyError:
                return 0
        else:
            cb_center_list.append(res['CA'].get_coord())

    ca_center = np.vstack(ca_center_list)
    cb_center = np.vstack(cb_center_list)

    df = pd.DataFrame({
        'chain_id': chain_list,
        'group_num': res_num_list,
        'group_name': res_name_list,
        'x': ca_center[:, 0],
        'y': ca_center[:, 1],
        'z': ca_center[:, 2],
        'xcb': cb_center[:, 0],
        'ycb': cb_center[:, 1],
        'zcb': cb_center[:, 2]
    })

    df.to_csv(f'{pdb_path}_bead.csv', index=False)
    return 1
Esempio n. 33
0
    def __init__(self, pdb, j_input):

        parser = PDBParser(QUIET=True)
        self.wkdir = Path.cwd()
        self.pdb = pdb.replace(".pdb", "")
        self.struct = parser.get_structure(pdb, pdb)
        self.atom_list = Selection.unfold_entities(self.struct, 'A')
        self.j_input = j_input
        self.parse_j_list()
    def test_on_pdb(self):
        """Align a PDB to itself."""
        pdb1 = "PDB/1A8O.pdb"
        p = PDBParser()
        s1 = p.get_structure("FIXED", pdb1)
        fixed = Selection.unfold_entities(s1, "A")
        s2 = p.get_structure("MOVING", pdb1)
        moving = Selection.unfold_entities(s2, "A")

        rot = np.eye(3, dtype=np.float64)
        tran = np.array([1.0, 2.0, 3.0], dtype=np.float64)
        for atom in moving:
            atom.transform(rot, tran)

        sup = QCPSuperimposer()
        sup.set_atoms(fixed, moving)
        self.assertEqual(self._arr_to_list(sup.rotran[0]), self._arr_to_list(rot))
        self.assertEqual(self._arr_to_list(sup.rotran[1]), self._arr_to_list(-tran))
        self.assertAlmostEqual(sup.rms, 0.0, places=6)
Esempio n. 35
0
 def test_fragment_mapper(self):
     """Self test for FragmentMapper module."""
     p = PDBParser()
     pdb1 = "PDB/1A8O.pdb"
     s = p.get_structure("X", pdb1)
     m = s[0]
     fm = FragmentMapper(m, 10, 5, "PDB")
     for r in Selection.unfold_entities(m, "R"):
         if r in fm:
             self.assertTrue(str(fm[r]).startswith("<Fragment length=5 id="))
 def test_fragment_mapper(self):
     """Self test for FragmentMapper module."""
     p = PDBParser()
     pdb1 = "PDB/1A8O.pdb"
     s = p.get_structure("X", pdb1)
     m = s[0]
     fm = FragmentMapper(m, 10, 5, "PDB")
     for r in Selection.unfold_entities(m, "R"):
         if r in fm:
             self.assertTrue(str(fm[r]).startswith("<Fragment length=5 id="))
Esempio n. 37
0
    def atom_distance(x, y):
        '''Returns the closest distance between two objects (x and y), i.e. the distance
		between the closest atoms in x and y.'''
        [x_atoms,
         y_atoms] = [Selection.unfold_entities(obj, 'A') for obj in [x, y]]
        distances = []
        for xatom in x_atoms:
            for yatom in y_atoms:
                distances.append(distance(xatom.get_coord(),
                                          yatom.get_coord()))
        return min(distances)
 def get_bfactors(self, chain_id):
     '''
         Input:
             self: Use Biopython.PDB structure which has been stored in an object variable
             chain_id  : String (usually in ['A','B', 'C' ...]. The number of chains
                     depends on the specific protein and the resulting structure)
         Return:
             Return the B-Factors for all residues in a chain of a Biopython.PDB structure.
             The B-Factors describe the mobility of an atom or a residue.
             In a Biopython.PDB structure B-Factors are given for each atom in a residue.
             Calculate the mean B-Factor for a residue by averaging over the B-Factor 
             of all atoms in a residue.
             Sometimes B-Factors are not available for a certain residue; 
             (e.g. the residue was not resolved); insert np.nan for those cases.
         
             Finally normalize your B-Factors using Standard scores (zero mean, unit variance).
             You have to use np.nanmean, np.nanvar etc. if you have nan values in your array.
             The returned data structure has to be a numpy array rounded again to integer.
     '''
     aacids = [
         a for a in Selection.unfold_entities(
             self.structure.child_list[0].child_dict[chain_id], 'R')
         if is_aa(a, standard=True)
     ]
     length = len(aacids)
     b_factors = np.zeros(length, dtype=np.float32)
     #b_factors = np.array(length, dtype=np.float32)
     for i in range(len(aacids)):
         atoms = Selection.unfold_entities(aacids[i], 'A')
         if len(atoms) == 0:
             b_factor = np.nan
         else:
             b_factor = np.average([a.get_bfactor() for a in atoms])
         b_factors[i] = b_factor
     mean = np.nanmean(b_factors)
     var = np.var(b_factors)
     sd = np.sqrt(var)
     for i in range(len(b_factors)):
         if b_factors[i] != np.nan:
             b_factors[i] = (b_factors[i] - mean) / sd
     return b_factors.astype(np.int)  # return rounded (integer) values
Esempio n. 39
0
	def __call__(self,structure):
		res_list = Selection.unfold_entities(structure,'R')
		res_list = filter(lambda res: res.id[0] == " ",res_list)
		distance_matrix = np.zeros([len(res_list),len(res_list)])
		for i,res1 in enumerate(res_list):
			for j,res2 in enumerate(res_list):
				try:
					distance_matrix[i,j] = res1['CA'] - res2['CA']
				except KeyError:
					distance_matrix[i,j] = 0

		return distance_matrix
Esempio n. 40
0
def get_surface(model, PDB_TO_XYZR=None, MSMS="msms"):
    """Represent molecular surface as a vertex list array.

    Return a Numpy array that represents the vertex list of the
    molecular surface.

    Arguments:
     - PDB_TO_XYZR - deprecated, ignore this.
     - MSMS - msms executable (used as argument to os.system)

    """
    # Issue warning if PDB_TO_XYZR is given
    if PDB_TO_XYZR is not None:
        warnings.warn(("PDB_TO_XYZR argument will be deprecated soon"
                       " in favor of an internal mapping algorithm."),
                      BiopythonDeprecationWarning)

    # Replace pdb_to_xyzr
    # Make x,y,z,radius file
    atom_list = Selection.unfold_entities(model, 'A')

    xyz_tmp = tempfile.mktemp()
    with open(xyz_tmp, 'w') as pdb_to_xyzr:
        for atom in atom_list:
            x, y, z = atom.coord
            radius = _get_atom_radius(atom, rtype='united')
            print('{:6.3f}\t{:6.3f}\t{:6.3f}\t{:1.2f}'.format(x, y, z, radius),
                  file=pdb_to_xyzr)

    # make surface
    surface_tmp = tempfile.mktemp()
    MSMS = MSMS + " -probe_radius 1.5 -if %s -of %s > " + tempfile.mktemp()
    make_surface = MSMS % (xyz_tmp, surface_tmp)
    os.system(make_surface)
    surface_file = surface_tmp + ".vert"
    if not os.path.isfile(surface_file):
        raise RuntimeError("Failed to generate surface file using "
                           "command:\n%s" % make_surface)

    # read surface vertices from vertex file
    surface = _read_vertex_array(surface_file)
    return surface
Esempio n. 41
0
 def __init__(self, model, pdb_file):
     depth_dict = {}
     depth_list = []
     depth_keys = []
     # get_residue
     residue_list = Selection.unfold_entities(model, "R")
     # make surface from PDB file
     surface = get_surface(pdb_file)
     # calculate rdepth for each residue
     for residue in residue_list:
         if not is_aa(residue):
             continue
         rd = residue_depth(residue, surface)
         ca_rd = ca_depth(residue, surface)
         # Get the key
         res_id = residue.get_id()
         chain_id = residue.get_parent().get_id()
         depth_dict[(chain_id, res_id)] = (rd, ca_rd)
         depth_list.append((residue, (rd, ca_rd)))
         depth_keys.append((chain_id, res_id))
         # Update xtra information
         residue.xtra["EXP_RD"] = rd
         residue.xtra["EXP_RD_CA"] = ca_rd
     AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
Esempio n. 42
0
    def _build_interface(self, model, id, threshold, rsa_calculation, rsa_threshold, include_waters=False, *chains):
        """
        Return the interface of a model
        """

        self.threshold=threshold

        # Recover chain list from initial unpacking
        chain_list = self.chain_list

        # Unfold atom list
        atom_list = []
        for c in model:
            if c.id in chain_list:
                atom_list.extend(Selection.unfold_entities(c,'A'))

        # Using of NeighborSearch class in order to get the list of all residues at least than
        # the threshold distance of each others
        ns=NeighborSearch(atom_list)
        pairs=ns.search_all(threshold, 'R')

        if not pairs:
            raise ValueError("No atoms found in the interface")        

        # Selection of residues pairs
        # 1. Exclude water contacts
        # 2. Filter same-chain contacts
        # 3. Filter user-defined chain pairs

        uniq_pairs=[]

        for pair in pairs:
             
            pair_resnames = (pair[0].resname, pair[1].resname)
            pair_chains = (pair[0].parent.id, pair[1].parent.id)

            if (not include_waters and 'HOH' in pair_resnames) or (pair_chains[0] == pair_chains[1]):
                continue

            if not (chains and not (pair_chains in chains)):
                uniq_pairs.append(pair)

        # Build the Interface
        # 1. Iterate over the pair list
        # 2. Add residues.

        for resA, resB in uniq_pairs:
            if resA not in self.interface:
                self._add_residue(resA)
            if resB not in self.interface:
                self._add_residue(resB)
                
        # Accessible surface area calculated for each residue
        # if naccess setup on user computer and rsa_calculation
        # argument is TRUE
        if rsa_calculation and os.system('which naccess') == 0:
            rsa_pairs=self._rsa_calculation(model, chain_list, rsa_threshold)
            
        for res in rsa_pairs:
            if res not in self.interface:
                self._add_residue(res)
        self._secondary_structure(model)
        #interface=uniq_pairs
        self.interface.uniq_pairs=uniq_pairs
Esempio n. 43
0
 def _process_structure(self):
     return [Chain(chain, self.charges) for chain in
             Selection.unfold_entities(self.structure, 'C')]
Esempio n. 44
0
def compute_interactions(pdb_name,save_to_db = False):

    do_distances = True
    do_interactions = True
    distances = []
    classified = []

    # Ensure that the PDB name is lowercase
    pdb_name = pdb_name.lower()

    # Get the pdb structure
    struc = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name)
    pdb_io = StringIO(struc.pdb_data.pdb)
    # Get the preferred chain
    preferred_chain = struc.preferred_chain.split(',')[0]

    # Get the Biopython structure for the PDB
    s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0]
    #s = pdb_get_structure(pdb_name)[0]
    chain = s[preferred_chain]
    #return classified, distances

     # remove residues without GN and only those matching receptor.
    residues = struc.protein_conformation.residue_set.exclude(generic_number=None).all().prefetch_related('generic_number')
    dbres = {}
    dblabel = {}
    for r in residues:
        dbres[r.sequence_number] = r
        dblabel[r.sequence_number] = r.generic_number.label
    ids_to_remove = []
    for res in chain:
        if not res.id[1] in dbres.keys() and res.get_resname() != "HOH":
            ids_to_remove.append(res.id)
    for i in ids_to_remove:
        chain.detach_child(i)

    if do_distances:
        for i1,res1 in enumerate(chain,1):
            if not is_water(res1):
                for i2,res2 in enumerate(chain,1):
                    if i2>i1 and not is_water(res2):
                        # Do not calculate twice.
                        distance = res1['CA']-res2['CA']
                        distances.append((dbres[res1.id[1]],dbres[res2.id[1]],distance,dblabel[res1.id[1]],dblabel[res2.id[1]]))

    if do_interactions:
        atom_list = Selection.unfold_entities(s[preferred_chain], 'A')
        # Search for all neighbouring residues
        ns = NeighborSearch(atom_list)
        all_neighbors = ns.search_all(4.5, "R")

        # Filter all pairs containing non AA residues
        all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])]

        # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart
        all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES]

        # For each pair of interacting residues, determine the type of interaction
        interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1]),dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]],struc) for res_pair in all_aa_neighbors]

        # Split unto classified and unclassified.
        classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0]

    if save_to_db: 

        if do_interactions:
            # Delete previous for faster load in
            InteractingResiduePair.objects.filter(referenced_structure=struc).all().delete()

            # bulk_pair = []
            # for d in distances:
            #     pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc)
            #     bulk_pair.append(pair)

            # Create interaction dictionary
            interaction_pairs = {}
            for pair in classified:
                res_1 = pair.get_residue_1()
                res_2 = pair.get_residue_2()
                key =  res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1])
                interaction_pairs[key] = pair

            # POSSIBLE ADDON: support for multiple water-mediated bonds
            ## Obtain list of water molecules
            water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() }
            if len(water_list) > 0:
                ## Iterate water molecules over residue atom list
                water_neighbors = [(water, match_res) for water in water_list
                                for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res)]

                # intersect between residues sharing the same interacting water
                for index_one in range(len(water_neighbors)):
                    water_pair_one = water_neighbors[index_one]

                    for index_two in [ index for index in range(index_one+1, len(water_neighbors)) if water_pair_one[0]==water_neighbors[index][0] ]:
                        water_pair_two = water_neighbors[index_two]
                        res_1 = water_pair_one[1]
                        res_2 = water_pair_two[1]
                        key =  res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1])

                        # Check if interaction is polar - NOTE: this is not capturing every angle
                        if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])):
                            # NOTE: Is splitting of sidechain and backbone-mediated interactions desired?
                            if key in interaction_pairs:
                                interaction_pairs[key].interactions.append(WaterMediated())
                            else:
                                interaction_pairs[key] = InteractingPair(res_1, res_2, [WaterMediated()], dbres[res_1.id[1]], dbres[res_2.id[1]], struc)

            for p in classified:
                p.save_into_database()

        if do_distances:
            # Distance.objects.filter(structure=struc).all().delete()
            bulk_distances = []
            for i,d in enumerate(distances):
                distance = Distance(distance=int(100*d[2]),res1=d[0], res2=d[1],gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3],d[4]]), structure=struc)
                bulk_distances.append(distance)
                if len(bulk_distances)>1000:
                    pairs = Distance.objects.bulk_create(bulk_distances)
                    bulk_distances = []

            pairs = Distance.objects.bulk_create(bulk_distances)



    return classified, distances
Esempio n. 45
0
SCALAR_EXPRESSION %s_startenergy = %s
SCALAR_EXPRESSION %s_o = %s_startenergy + 4.91
SCALAR_EXPRESSION %s_s = %s
SCALAR_EXPRESSION %s_exp = exp(%s_s*(%s_currentenergy-%s_o))
SCALAR_EXPRESSION %s_k = %s
SCALAR_EXPRESSION %s_sig = (1-%s_k)+(%s_k/(1+%s_exp))

""") %(structure_id, structurefilename, correspondencefilename, resfilename, structure_id, startenergy, structure_id, structure_id, structure_id, s_value, structure_id, structure_id, structure_id, structure_id, structure_id, k_value, structure_id, structure_id, structure_id, structure_id)
		fitnessfile.write(outstring)

		fitnessstring = fitnessstring + str("*%s_sig") % (structure_id)
		

		nucleosome = structure[0]

		atom_list = Selection.unfold_entities(nucleosome, 'A') # A for atoms
		neighbor_search = NeighborSearch(atom_list)

		contacts_list = neighbor_search.search_all(radius, level = 'R')
		
		repack_residues = []
		for contact in contacts_list:
			res1 = contact[0]
			res2 = contact[1]
			res1id = int(res1.get_id()[1])
			chain1 = res1.get_parent()
			chain1id = chain1.get_id()
			res2id = int(res2.get_id()[1])
			chain2 = res2.get_parent()
			chain2id = chain2.get_id()
			
Esempio n. 46
0
 def test_Superimposer(self):
     """Test on module that superimpose two protein structures."""
     pdb1 = "PDB/1A8O.pdb"
     p = PDBParser()
     s1 = p.get_structure("FIXED", pdb1)
     fixed = Selection.unfold_entities(s1, "A")
     s2 = p.get_structure("MOVING", pdb1)
     moving = Selection.unfold_entities(s2, "A")
     rot = numpy.identity(3).astype('f')
     tran = numpy.array((1.0, 2.0, 3.0), 'f')
     for atom in moving:
         atom.transform(rot, tran)
     sup = Superimposer()
     sup.set_atoms(fixed, moving)
     self.assertTrue(numpy.allclose(sup.rotran[0], numpy.identity(3)))
     self.assertTrue(numpy.allclose(sup.rotran[1], numpy.array([-1.0, -2.0, -3.0])))
     self.assertAlmostEqual(sup.rms, 0.0, places=3)
     atom_list = ['N', 'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C',
                  'O', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C',
                  'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C',
                  'N', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N',
                  'N', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C',
                  'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C',
                  'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O',
                  'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C',
                  'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N',
                  'C', 'N', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'O',
                  'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
                  'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C',
                  'O', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C',
                  'C', 'N', 'C', 'N', 'N', 'N', 'C', 'C', 'O', 'C', 'C',
                  'C', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C',
                  'C', 'C', 'C', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C',
                  'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'O', 'C',
                  'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C',
                  'O', 'C', 'C', 'C', 'N', 'C', 'N', 'N', 'N', 'C', 'C',
                  'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O',
                  'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C',
                  'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'O', 'N', 'C',
                  'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O',
                  'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C',
                  'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N',
                  'C', 'C', 'O', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O',
                  'C', 'C', 'C', 'C', 'N', 'C', 'C', 'C', 'C', 'C', 'N',
                  'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C', 'O',
                  'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O',
                  'O', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C',
                  'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C',
                  'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C',
                  'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O',
                  'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'N', 'C',
                  'C', 'O', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C',
                  'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'O', 'N',
                  'C', 'C', 'O', 'C', 'S', 'N', 'C', 'C', 'O', 'C', 'C',
                  'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N',
                  'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O',
                  'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C',
                  'C', 'N', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O',
                  'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'N', 'C', 'C',
                  'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'N', 'C', 'C',
                  'O', 'C', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C',
                  'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C',
                  'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C',
                  'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N',
                  'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C', 'O',
                  'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C',
                  'O', 'C', 'S', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O',
                  'N', 'N', 'C', 'C', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                  'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                  'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                  'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                  'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                  'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                  'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                  'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                  'O', 'O', 'O', 'O', 'O', 'O']
     sup.apply(moving)
     atom_moved = []
     for aa in moving:
         atom_moved.append(aa.element)
     self.assertEqual(atom_moved, atom_list)
Esempio n. 47
0
parser.add_argument("--resnum", nargs='+', required=True, type=int)
parser.add_argument("--chain2", nargs='+', required=True)

args = parser.parse_args()


from Bio.PDB import NeighborSearch, PDBParser, Selection

structure = PDBParser().get_structure('X', args.pdb)

pymol_command = ""

chain = structure[0][args.chain1]  # Supply chain name for "center residues"
center_residues = [chain[resi] for resi in args.resnum]

center_atoms = Selection.unfold_entities(center_residues, 'A')


for j in args.chain2 :
    atom_list = [atom for atom in structure[0][j].get_atoms() if atom.name == 'CA' ]
    ns = NeighborSearch(atom_list)

    nearby_residues = {res for center_atom in center_atoms
                        for res in ns.search(center_atom.coord, 8.5, 'R')}

    print "\nNeighbor residues in chain ", j, ": \n"
    print sorted(res.id[1] for res in nearby_residues)

    pymol_command = "show spheres, chain " + j + " and resi "

    for m in sorted(res.id[1] for res in nearby_residues):
Esempio n. 48
0
    def __getitem__(self, res):
        """
        @type res: L{Residue}

        @return: fragment classification
        @rtype: L{Fragment}
        """
        return self.fd[res]


if __name__=="__main__":

    import sys

    p=PDBParser()
    s=p.get_structure("X", sys.argv[1])

    m=s[0]
    fm=FragmentMapper(m, 10, 5, "levitt_data")


    for r in Selection.unfold_entities(m, "R"):

        print r,
        if r in fm:
            print fm[r]
        else:
            print

Esempio n. 49
0
def get_residue_depth(pdb_fh,msms_fh):
    """
    Extracts Residue depth from PDB structure 

    :param pdb_fh: path to PDB structure file
    :param msms_fh: path to MSMS libraries
    :returns data_depth: pandas table with residue depth per residue
    """
    from Bio.PDB import Selection,PDBParser
    from Bio.PDB.Polypeptide import is_aa
    from Bio.PDB.ResidueDepth import get_surface,_read_vertex_array,residue_depth,ca_depth,min_dist
    surface_fh="%s/%s.msms.vert" % (dirname(msms_fh),basename(pdb_fh))
    if not exists(surface_fh):
        pdb_to_xyzr_fh="%s/pdb_to_xyzr" % dirname(msms_fh)
        xyzr_fh="%s/%s.xyzr" % (dirname(msms_fh),basename(pdb_fh))
        pdb_to_xyzr_com="%s %s > %s" % (pdb_to_xyzr_fh,pdb_fh,xyzr_fh)
        msms_com="%s -probe_radius 1.5 -if %s -of %s > %s.log" % (msms_fh,xyzr_fh,splitext(surface_fh)[0],splitext(surface_fh)[0])
        log_fh="%s.log" % msms_fh
        log_f = open(log_fh,'a')
        log_f.write("%s;\n%s\n" % (pdb_to_xyzr_com,msms_com))
        subprocess.call("%s;%s" % (pdb_to_xyzr_com,msms_com) , shell=True,stdout=log_f, stderr=subprocess.STDOUT)
        log_f.close()

    surface =_read_vertex_array(surface_fh)
    
    pdb_parser=PDBParser()
    pdb_data=pdb_parser.get_structure("pdb_name",pdb_fh)
    model = pdb_data[0]
    residue_list = Selection.unfold_entities(model, 'R') 
    
    depth_dict = {}
    depth_list = []
    depth_keys = []
    for residue in residue_list:
        if not is_aa(residue):
            continue
        rd = residue_depth(residue, surface)
        ca_rd = ca_depth(residue, surface)
        # Get the key
        res_id = residue.get_id()
        chain_id = residue.get_parent().get_id()
        if chain_id=="A":
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra['EXP_RD'] = rd
            residue.xtra['EXP_RD_CA'] = ca_rd
        else:
            break
    depth_df=pd.DataFrame(depth_dict).T.reset_index()
    depth_df=depth_df.drop("level_0",axis=1)
    aasi_prev=0
    for i in range(len(depth_df)):
        if depth_df.loc[i,"level_1"][1]!=aasi_prev:
            depth_df.loc[i,"aasi"]=depth_df.loc[i,"level_1"][1]
            aasi_prev=depth_df.loc[i,"level_1"][1]

    depth_df=depth_df.drop("level_1",axis=1)
    depth_df=depth_df.loc[~pd.isnull(depth_df.loc[:,"aasi"]),:]
    depth_df=depth_df.set_index("aasi",drop=True)
    depth_df.columns=["Residue depth","Residue (C-alpha) depth"]
    return depth_df
Esempio n. 50
0
            raise PDBException("No transformation has been calculated yet")
        rot, tran = self.rotran
        rot = rot.astype('f')
        tran = tran.astype('f')
        for atom in atom_list:
            atom.transform(rot, tran)


if __name__ == "__main__":
    import sys

    from Bio.PDB import PDBParser, Selection

    p = PDBParser()
    s1 = p.get_structure("FIXED", sys.argv[1])
    fixed = Selection.unfold_entities(s1, "A")

    s2 = p.get_structure("MOVING", sys.argv[1])
    moving = Selection.unfold_entities(s2, "A")

    rot = numpy.identity(3).astype('f')
    tran = numpy.array((1.0, 2.0, 3.0), 'f')

    for atom in moving:
        atom.transform(rot, tran)

    sup = Superimposer()

    sup.set_atoms(fixed, moving)

    print(sup.rotran)
Esempio n. 51
0
import networkx as nx
import matplotlib.pyplot as plt

from pprint import pprint as pp

import numpy as np

distanceThreshold = 1  # sys.argv[2]

pdbList = PDBList()
pdbParser = PDBParser()

proteinName = "1MBN"
structure = pdbParser.get_structure(proteinName, pdbList.retrieve_pdb_file(proteinName))

resList = Selection.unfold_entities(structure, "R")
distanceMatrix = np.zeros([len(resList), len(resList)])


def genDistanceMatrix(dMatrix, rList):
    caMap = {res.id[1]: res["CA"] for res in rList if "CA" in res}

    pp(caMap)
    pp(len(caMap))


genDistanceMatrix(distanceMatrix, resList)
# for atm in atom_cords.keys():
# 	genDistances(atm)

# pp(distanceMatrix)