Ejemplo n.º 1
0
 def collectHbondsNumber(self, foldxIndivFile):
     wt_mut_chain_id = whatMutation(foldxIndivFile)
     prot_chain = wt_mut_chain_id[3]
     prot_mut_id = int(wt_mut_chain_id[2])
     model = self.structure[0]
     myres = model[prot_chain][prot_mut_id]
     atoms = Selection.unfold_entities(model, 'A')  # A for atoms
     ns = NeighborSearch(atoms)
     resname = myres.get_resname()
     h_bond_as_donor = []
     h_bond_as_acceptor = []
     for atom in myres:
         if atom.name in donors:
             #atoms  = Selection.unfold_entities(model, 'A')
             #print(atoms)
             # cutoff of 3.2 angstroms D-A, strong mostly covalent according to
             # Jeffrey, George A.; An introduction to hydrogen bonding, Oxford University Press, 1997.
             close_atoms = ns.search(atom.coord, 3.2)
             for close_atom in close_atoms:
                 full_atom_id = close_atom.get_full_id()
                 if (full_atom_id[3][1] != prot_mut_id) and (
                         full_atom_id[4][0]
                         in acceptors + main_chain_acceptors):
                     acceptor_atom_id = full_atom_id[3][1]
                     h_bond_as_donor.append(acceptor_atom_id)
         # do the same thing fro acceptor atoms
         if atom.name in acceptors:
             close_atoms = ns.search(atom.coord, 3.2)
             for close_atom in close_atoms:
                 full_atom_id = close_atom.get_full_id()
                 if (full_atom_id[3][1] != prot_mut_id) and (
                         full_atom_id[4][0] in donors + main_chain_donors):
                     acceptor_atom_id = full_atom_id[3][1]
                     h_bond_as_acceptor.append(acceptor_atom_id)
     return len(h_bond_as_donor) + len(h_bond_as_acceptor)
Ejemplo n.º 2
0
    def collectSaltBridge(self, position, chain):
        model = self.structure[0]
        myres = model[chain][position]
        atoms = Selection.unfold_entities(model, 'A')  # A for atoms

        ns = NeighborSearch(atoms)
        if myres.get_resname() not in ['ARG', 'LYS', 'ASP', 'GLU', 'HIS']:
            is_sb = 0
        else:
            for atom in myres:
                atom_id = atom.get_full_id()
                if atom_id[4][0] in ['NH1', 'NH2', 'NZ', 'NE2']:
                    close_atoms = ns.search(
                        atom.coord, 4.5
                    )  # cutoff of 4 crieria fixed by Barlow, J M Thornton (PMID6887253) +0.5A to account for the unoptimised side chain
                    if any(atom in [atomtype.id for atomtype in close_atoms]
                           for atom in ['OE1', 'OE2', 'OD1', 'OD2']):
                        is_sb = 1
                        break
                    else:
                        is_sb = 0
                        break
                elif atom_id[4][0] in ['OE1', 'OE2', 'OD1', 'OD2']:
                    close_atoms = ns.search(atom.coord, 4.5)

                    if any(atom in [atomtype.id for atomtype in close_atoms]
                           for atom in ['NH1', 'NH2', 'NZ', 'NE2']):
                        is_sb = 1
                        break
                    else:
                        is_sb = 0
                        break
        return is_sb
Ejemplo n.º 3
0
def check_clash(str_name, v=False):
    """check_clash, fract of clashes!

        if zero contacts then error -> fix ->

        Problem, contacts, str_name: 311 505 na-prot_13536.pdb
        Sterical clashes  0.615841584158

        c is counter
        """
    if v: print('fn:', str_name)
    structure = open(str_name)
    #model = structure[0]
    atoms_A = []
    atoms_B = []
    for line in structure.readlines():
        if line[:4] == "ATOM":
            #print line
            at_nam = line[12:16].strip()
            coor = [float(line[30:38]), float(line[38:46]), float(line[46:54])]
            at = Atom.Atom(at_nam, coor, 0.0, 1.0, ' ', at_nam, 1, at_nam[0])
            if line[21] == "A":
                atoms_A.append(at)
            elif line[21] == "B":
                atoms_B.append(at)
            else:
                pass
    #atoms_B = Selection.unfold_entities(structure[0]['B'], 'A')
    #print len(atoms_A), len(atoms_B)
    if len(atoms_A) > len(atoms_B):
        less = atoms_B
        more = atoms_A
    else:
        less = atoms_A
        more = atoms_B
    problem = 0
    contacts = 0
    ns = NeighborSearch(more)
    for at in less:
        neighbors = ns.search(array(at.get_coord()), 2.0, 'A')
        if neighbors != []:
            problem += 1
            contacts += 1
        else:
            neighbors1 = ns.search(array(at.get_coord()), 4.0, 'A')
            if neighbors1 != []:
                contacts += 1
    if v:
        print('problem:', float(problem))
        print('contacts:', float(contacts))
    try:
        fract = float(problem) / float(contacts)
    except ZeroDivisionError:
        fract = problem  # or skip this structure
        if v: print('ZeroDivison -- skip:', problem, contacts, str_name)
        return fract

    #print 'Contacts, str_name:', problem, contacts, str_name, "Sterical clashes ", fract
    return fract
Ejemplo n.º 4
0
def check_clash(str_name, v=True):
        """check_clash, fract of clashes!

        if zero contacts then error -> fix ->

        Problem, contacts, str_name: 311 505 na-prot_13536.pdb
        Sterical clashes  0.615841584158

        c is counter
        """
        print(str_name)
        structure = open(str_name)
        #model = structure[0]
        atoms_A = []
        atoms_B = []
        for line in structure.readlines():
            if line[:4] == "ATOM":
                #print line
                at_nam = line[12:16].strip()
                coor = [float(line[30:38]),float(line[38:46]), float(line[46:54])]	
                at = Atom.Atom(at_nam,coor,0.0,1.0,' ',at_nam,1,at_nam[0])
                if line[21] == "A":
                    atoms_A.append(at)
                elif line[21] == "B":
                    atoms_B.append(at)
                else: pass
        #atoms_B = Selection.unfold_entities(structure[0]['B'], 'A')
        #print len(atoms_A), len(atoms_B)
        if len(atoms_A) > len(atoms_B):
            less = atoms_B
            more = atoms_A
        else: 
            less = atoms_A
            more = atoms_B
        problem = 0
        contacts = 0 
        ns=NeighborSearch(more)
        for at in less:
             neighbors=ns.search(array(at.get_coord()),2.0,'A')
             if neighbors != []:
                 problem +=1
                 contacts +=1
             else:
                 neighbors1=ns.search(array(at.get_coord()),4.0,'A')
                 if neighbors1 != []:
                     contacts +=1
        if v:
                print('problem:', float(problem))
                print('contacts:', float(contacts))
        try:
            fract = float(problem)/float(contacts)
        except ZeroDivisionError:
            fract = problem # or skip this structure
            print('ZeroDivison -- skip:', problem, contacts, str_name)
            return fract

        #print 'Contacts, str_name:', problem, contacts, str_name, "Sterical clashes ", fract
        return fract
Ejemplo n.º 5
0
def pdb_neighbors(pdb_f, pdb_id):
    structure = PDBParser().get_structure(pdb_id, pdb_f)
    atom_list = Selection.unfold_entities(structure, 'A')
    ns = NeighborSearch(atom_list)
    center_res = [
        res for res in structure.get_residues()
        if res.get_resname() in ['PTR', 'SEP', 'TPO']
    ]

    neighbors = []
    for res in center_res:
        if res.get_resname() == 'PTR':
            atoms = [
                atom for atom in res.child_list
                if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OH']
            ]
        elif res.get_resname() == 'SEP':
            atoms = [
                atom for atom in res.child_list
                if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OG']
            ]
        elif res.get_resname() == 'TPO':
            atoms = [
                atom for atom in res.child_list
                if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OG1']
            ]
        atom_neighbors = [ns.search(a.get_coord(), BOND_CUTOFF) for a in atoms]
        atom_neighbors = [atom for atoms in atom_neighbors for atom in atoms]
        positive_atom_neighbors = [
            ns.search(a.get_coord(), POSITIVE_BOND_CUTOFF) for a in atoms
            if a.get_name() in ['NE2', 'ND1', 'NZ', 'NE', 'NH2', 'NH1']
        ]
        positive_atom_neighbors = [
            atom for atoms in positive_atom_neighbors for atom in atoms
        ]
        atom_neighbors.extend(positive_atom_neighbors)
        atom_neighbors = list(set(atom_neighbors))
        atom_neighbors = [
            atom for atom in atom_neighbors
            if 'N' in atom.get_name() or 'O' in atom.get_name()
        ]
        atom_neighbors = list(
            set(Selection.unfold_entities(atom_neighbors, 'R')))
        atom_neighbors = [r for r in atom_neighbors if r != res]

        if len(atom_neighbors) > 0:
            res = res.get_resname() + '_' + str(
                res.get_id()[1]) + '_' + res.get_parent().get_id()
            atom_neighbors = [
                n.get_resname() + '_' + str(n.get_id()[1]) + '_' +
                n.get_parent().get_id() for n in atom_neighbors
            ]
            neighbors.append((pdb_id, res, atom_neighbors))

    return neighbors
Ejemplo n.º 6
0
def CheckClashes(structure, chain):
    """
    Checks for clashes at a radius = 2 between a PDB structure and all the atoms on the provided chain.
    Returns True or False depending if number of different residues clashing exceeds 25.
    Arguments:
        -structure: PDB structure.
        -chain: PDB chain structure to check if it has clashes with the main structure.
    """
    # declare NeighborSearch() object instance with all the atoms from the structure (model 0), that includes all chains of that structure.
    ns = NeighborSearch(unfold_entities(structure[0], 'A'))

    # iterate over atoms in input chain, search for close residues
    clashing_residues = set([])
    for atom in chain.get_atoms():
        close_res = ns.search(atom.get_coord(), radius=2, level="R")

        try:
            close_res.remove(atom.get_parent())
        except ValueError:
            pass

        for res in close_res:
            neighbor_res = (atom.get_parent(), res)
            clashing_residues.add(neighbor_res)
            if len(clashing_residues) > 25:
                return True

    return False
Ejemplo n.º 7
0
def get_residues_distance_distribution(data, r):
    """
    This function computes the distribution of number of residues in certain radius around residues.

    """
    files = glob.glob(data + "*_b.pdb")
    dist = {}
    files.sort()
    file_counter = 0
    for bound_pbd in files:
        dist[bound_pbd] = []
    l = []
    for bound_pbd in files:
        file_counter += 1
        print "Protein " + str(file_counter) + "/" + str(len(files))
        s, a, r = read_pdb_file(bound_pbd)
        ns = NeighborSearch(a)
        res_counter = 0
        for res in r:
            b = 0 * res.child_list[0].get_coord()
            for atom in res.child_list:
                b += atom.get_coord()
            center = b / len(res.child_list)
            l.append(len(ns.search(center, 100, "R")))
            res_counter += 1
            # print "Residue " + str(res_counter) + "out of " + str(len(r))
    plot(np.bincount(l))
    show()
    print files
Ejemplo n.º 8
0
def get_neighbor_chains(structure, options):
    """
    Takes an structure and returns a dictionary with chains as keys and a list of chains as values holding the
    chains with alpha carbons at less than 8 amstrongs from an alpha carbon of the key chain
    :param structure: structure we want to check for clashes.
    :return: dictionary with the clashes between chains.
    """

    neighbor_chains = {}
    ns = NeighborSearch(list(structure.get_atoms()))
    for chain in structure.get_chains():
        chains = list(structure.get_chains())
        neighbor_chains[chain] = set([])

        neighbor_dict = {}
        for atom in [atom for atom in chain.get_atoms() if
                     atom.get_id() == 'CA' or atom.get_id() == 'P']:  # For every alpha carbon or P in chain
            for atom2 in ns.search(atom.get_coord(), 8, level='A'):
                if atom2.get_id() == 'CA' or atom2.get_id() == 'P':  # for every alpha carbon or P at 8 angstroms or less from atom
                    chain2 = atom2.get_parent().get_parent()  # Getting to which chain it belongs
                    if chain2 != chain and chain2 not in neighbor_chains.keys():
                        # If it is not in the same chain and it is not already assessed
                        if chain2 not in neighbor_dict:
                            neighbor_dict[chain2] = 0
                        neighbor_dict[chain2] += 1

        if options.verbose:
            print('\n%s' % chain)
            for close_chain, contacts in neighbor_dict.items():
                print('%s: %s' % (close_chain, contacts))
                if contacts > 8:
                    neighbor_chains[chain].add(close_chain)
    return neighbor_chains
Ejemplo n.º 9
0
def within(resnum, angstroms, chain_id, model, use_ca=False, custom_coord=None):
    """See: https://www.biostars.org/p/1816/ https://www.biostars.org/p/269579/

    Args:
        resnum (int):
        angstroms (float):
        chain_id (str):
        model (Model):
        use_ca (bool): If the alpha-carbon atom should be used for the search, otherwise use the last atom of the residue
        custom_coord (list): Custom XYZ coordinate to get within

    Returns:
        list: List of Bio.PDB.Residue.Residue objects

    """
    # XTODO: documentation
    # TODO: should have separate method for within a normal residue (can use "resnum" with a int) or a custom coord,
    # where you don't need to specify resnum
    atom_list = Selection.unfold_entities(model, 'A')
    ns = NeighborSearch(atom_list)

    if custom_coord:  # a list of XYZ coord
        target_atom_coord = np.array(custom_coord, 'f')
    else:
        target_residue = model[chain_id][resnum]
        if use_ca:
            target_atom = target_residue['CA']
        else:
            target_atom = target_residue.child_list[-1]
        target_atom_coord = np.array(target_atom.get_coord(), 'f')
    neighbors = ns.search(target_atom_coord, angstroms)
    residue_list = Selection.unfold_entities(neighbors, 'R')

    return residue_list
Ejemplo n.º 10
0
    def search_connectivity(self, atom_center, rank, NS=None, previous=None):

        if NS is None:
            NS = NeighborSearch(self.model.atoms)

        resultat = [
            atom for atom in NS.search(center=atom_center.get_coord(),
                                       radius=parametres.cutoff_bonds,
                                       level="A")
            if atom not in [atom_center, previous]
        ]

        if rank == 1:
            return resultat
        else:
            connectivity = {}
            for atom in resultat:
                resultat_temp = self.search_connectivity(atom_center=atom,
                                                         rank=rank - 1,
                                                         NS=NS,
                                                         previous=atom_center)
                if len(resultat_temp) == 0:
                    connectivity[atom] = None
                else:
                    connectivity[atom] = resultat_temp
            return connectivity
 def extract_feature(self):
     seed(self.seed)
     counter = 0
     print_info_nn(" >>> Adding D2 category based shape distribution for database {0} ... ".format(self._database.name))
     overall_time = datetime.now()
     if not os.path.exists(self._get_dir_name()):
         os.makedirs(self._get_dir_name())
     for complex_name in self._database.complexes.keys():
         protein_complex = self._database.complexes[complex_name]
         proteins = [protein_complex.unbound_formation.ligand, protein_complex.unbound_formation.receptor]
         for protein in proteins:
             shape_dist_file = self._get_dir_name() + protein.name
             if not os.path.exists(shape_dist_file + ".npy"):
                 counter += 1
                 if counter <= 15:
                     print_info_nn("{0}, ".format(protein.name))
                 else:
                     counter = 0
                     print_info("{0}".format(protein.name))
                 atoms = protein.atoms
                 neighbour_search = NeighborSearch(atoms)
                 distributions = np.zeros((len(protein.residues), self.number_of_bins))
                 for i in range(len(protein.residues)):
                     residue = protein.residues[i]
                     nearby_residues = neighbour_search.search(residue.center, self.radius, "R")
                     distributions[i, :] = self._compute_distribution(nearby_residues)
                 np.save(shape_dist_file, distributions)
             distributions = np.load(shape_dist_file + ".npy")
             for i in range(len(protein.residues)):
                 protein.residues[i].add_feature(Features.D2_CATEGORY_SHAPE_DISTRIBUTION, distributions[i, :])
     print_info("took {0} seconds.".format((datetime.now() - overall_time).seconds))
Ejemplo n.º 12
0
def pdb_neighbors(pdb_f, pdb_id):
    structure = PDBParser().get_structure(pdb_id, pdb_f)
    atom_list = Selection.unfold_entities(structure, 'A')
    ns = NeighborSearch(atom_list)
    center = [(a, a.get_coord()) for a in structure.get_atoms()
              if a.get_parent().get_resname() in ['PTR', 'SEP', 'TPO']
              and a.get_name() in ['O1P', 'O2P', 'O3P', 'OH', 'OG', 'OG1']]
    # if there are no phos-atomes, return
    if len(center) == 0:
        return ''
    neighbors = {}
    for a, c in center:
        # set neighbor distance cutoff
        neighbor_list = ns.search(c, BOND_CUTOFF)
        residue_list = list(set(Selection.unfold_entities(neighbor_list, 'R')))
        try:
            neighbors[Selection.unfold_entities(a, 'R')[0]] = [
                x for x in residue_list
                if not x == Selection.unfold_entities(a, 'R')[0]
            ]
        except:
            continue
    # add residue id and chain id
    neighbors_full = dict([('_'.join(
        [k.get_resname(),
         str(k.get_id()[1]),
         str(k.get_parent().get_id())]), [
             '_'.join([
                 vi.get_resname(),
                 str(vi.get_id()[1]),
                 str(vi.get_parent().get_id())
             ]) for vi in v
         ]) for k, v in neighbors.iteritems()])
    return neighbors_full
Ejemplo n.º 13
0
	def __init__(self, structure, chain_id, threshold):
		ns = NeighborSearch(list(structure.get_atoms()))

		for chain in structure[0]:
			if chain.id == chain_id:
				for residue in chain:
					if residue.id[0] == ' ':
						inter = 0
						intra = 0
						lon = 0
						shor = 0
	
	
						center = get_center(residue)
						for a in ns.search(center, threshold):  # Iterate over contacts
							if a.get_full_id()[2] == residue.get_full_id()[2]:
								#if abs(int(a.get_full_id()[1]) - int(residue.get_full_id()[1])) > 3:
								intra += 1
							else:
								inter += 1
							if np.linalg.norm(a.get_coord()-center) < threshold/2:
								shor += 1
							else:
								lon += 1
						self.contacts[residue.get_full_id()] = (inter, intra, lon, shor)
Ejemplo n.º 14
0
 def __calculate_separate_simbox_statistics(self, component, radius): #, cells_taken_by_component):
     """
     calculates the number of grid cells describing each complex component.
     calculation is done when all components are located in the center of simulation box
     """
     self.cells_taken_by_component = {}
     grid = Grid("cubic", radius, 10*radius, -1.)
     grid.calculate_boundaries(component.pyrystruct.struct)
     grid.generate_cubic_grid([])
     
     #scitree = scipyconverter(grid.grid_cells)
     ns = NeighborSearch(grid.grid_cells)
                 
     for atom in component.pyrystruct.struct.get_atoms():
         center = array([atom.coord[0], atom.coord[1], atom.coord[2]])
         if atom.vdw <= radius: rad = radius* sqrt(3)
         else: rad = (radius*sqrt(3)) + atom.vdw
         neighbours = ns.search(center, rad, 'A') #atom.vdw + radius
         #neighbours = scitree.run_neisearch(center, rad, eps=0)
         self.cells_taken_by_component = self.__calculate_simbox_statistics(neighbours) #, cells_taken_by_component)
         if len(neighbours) == 0: print "OMGOMG"*10, component.pyrystruct.chain, atom.serial_number
      
     del grid
     del ns
     neighbours = []
     return len(self.cells_taken_by_component)
Ejemplo n.º 15
0
    def search_subs(
        self,
        hx_atom,
        cx_atom,
        cy_atom,
        hy_atom,
        j_atoms,
        chosen_j
    ):
        """ Description:

            Usage:

            Parameters:
        """

        subs_list = []
        # j_atoms = [j[0] for j in j_atoms]
        center = self.atom_dict[cy_atom][2]
        ns = NeighborSearch(self.atom_list)
        neighbors = ns.search(center, 1.7)
        for neigh_atom in neighbors:
            if neigh_atom.serial_number not in j_atoms:
                subs_list.append(neigh_atom)

        for subs in subs_list:
            self.j_dict[chosen_j]["substituents"][subs.serial_number] = {
                "SY": self.atom_dict[subs.serial_number][0],
                "HX": self.atom_dict[hx_atom][0],
                "CX": self.atom_dict[cx_atom][0],
                "CY": self.atom_dict[cy_atom][0],
                "element": self.atom_dict[subs.serial_number][1]
            }
def _get_contacts(pdb_path, chain_rec, chain_lig, contact_dist):
    structure = pdb_parser.get_structure('X', pdb_path)[0]

    receptor = [structure[chain_rec_id] for chain_rec_id in chain_rec]
    ligand = [structure[chain_lig_id] for chain_lig_id in chain_lig]

    receptor_atoms = Selection.unfold_entities(receptor, 'A')
    ns = NeighborSearch(receptor_atoms)

    ligand_residues = Selection.unfold_entities(ligand, 'R')
    contacts = set([])
    contacts_lig = set([])
    contacts_rec = set([])
    for ligand_res in ligand_residues:
        lig_resname = dindex_to_1[d3_to_index[ligand_res.get_resname()]]
        lig_resnum = ligand_res.get_id()[1]
        lig_chname = ligand_res.get_parent().get_id()
        res_contacts = []

        for lig_atom in ligand_res:
            neighbors = ns.search(lig_atom.get_coord(), contact_dist)
            res_contacts += Selection.unfold_entities(neighbors, 'R')

        for receptor_res in res_contacts:
            rec_resname = dindex_to_1[d3_to_index[receptor_res.get_resname()]]
            rec_resnum = receptor_res.get_id()[1]
            rec_chname = receptor_res.get_parent().get_id()

            contacts.add((rec_resname, rec_resnum, rec_chname, lig_resname,
                          lig_resnum, lig_chname))
            contacts_lig.add((lig_resname, lig_resnum, lig_chname))
            contacts_rec.add((rec_resname, rec_resnum, rec_chname))

    return contacts, contacts_rec, contacts_lig
Ejemplo n.º 17
0
def get_residues_distance_distribution(data, r):
    """
    This function computes the distribution of number of residues in certain radius around residues.

    """
    files = glob.glob(data + "*_b.pdb")
    dist = {}
    files.sort()
    file_counter = 0
    for bound_pbd in files:
        dist[bound_pbd] = []
    l = []
    for bound_pbd in files:
        file_counter += 1
        print "Protein " + str(file_counter) + "/" + str(len(files))
        s, a, r = read_pdb_file(bound_pbd)
        ns = NeighborSearch(a)
        res_counter = 0
        for res in r:
            b = 0 * res.child_list[0].get_coord()
            for atom in res.child_list:
                b += atom.get_coord()
            center = b / len(res.child_list)
            l.append(len(ns.search(center, 100, "R")))
            res_counter += 1
            # print "Residue " + str(res_counter) + "out of " + str(len(r))
    plot(np.bincount(l))
    show()
    print files
Ejemplo n.º 18
0
def get_interactions_between_chains(model, chain_id_1, chain_id_2, r_cutoff=6):
    """Calculate interactions between the residues of the two chains.

    An interaction is defines as a pair of residues where at least one pair of atom
    is closer than r_cutoff.

    .. deprecated:: 1.0
        Use python:fn:`get_interacting_residues` instead.
        It gives you both the residue index and the resnum.

    Returns
    -------
    OrderedDict
        Keys are (residue_number, residue_amino_acid) tuples
        (e.g. ('0', 'M'), ('1', 'Q'), ...).
        Values are lists of (residue_number, residue_amino_acid) tuples.
        (e.g. [('0', 'M'), ('1', 'Q'), ...]).
    """
    try:
        from Bio.PDB import NeighborSearch
    except ImportError as e:
        logger.warning('Importing Biopython NeighborSearch returned an error: {}'.format(e))
        logger.warning('Using the the slow version of the neighbour-finding algorithm...')
        return get_interactions_between_chains_slow(model, chain_id_1, chain_id_2, r_cutoff)

    # Extract the chains of interest from the model
    chain_1 = None
    chain_2 = None
    for child in model.get_list():
        if child.id == chain_id_1:
            chain_1 = child
        if child.id == chain_id_2:
            chain_2 = child
    if chain_1 is None or chain_2 is None:
        raise Exception('Chains %s and %s were not found in the model' % (chain_id_1, chain_id_2))

    ns = NeighborSearch(list(chain_2.get_atoms()))
    interactions_between_chains = OrderedDict()
    for idx, residue_1 in enumerate(chain_1):
        if residue_1.resname in AMINO_ACIDS and residue_1.id[0] == ' ':
            resnum_1 = str(residue_1.id[1]) + residue_1.id[2].strip()
            resaa_1 = convert_aa(residue_1.get_resname(), quiet=True)
            interacting_residues = set()
            for atom_1 in residue_1:
                interacting_residues.update(ns.search(atom_1.get_coord(), r_cutoff, 'R'))
            interacting_resids = []
            for residue_2 in interacting_residues:
                resnum_2 = str(residue_2.id[1]) + residue_2.id[2].strip()
                resaa_2 = convert_aa(residue_2.get_resname(), quiet=True)
                if residue_2.resname in AMINO_ACIDS and residue_2.id[0] == ' ':
                    interacting_resids.append((resnum_2, resaa_2,))
            if interacting_resids:
                interacting_resids.sort(
                    key=lambda x: int(''.join([c for c in x[0] if c.isdigit()])))
                interactions_between_chains[(resnum_1, resaa_1)] = interacting_resids
    return interactions_between_chains
Ejemplo n.º 19
0
def compute_interaction_center(pdb_file, mutation_site):
    """Computes the geometric center of all heavy atoms interacting with the mutated residue.
    
    Parameters
    ----------
    pdb_file : str
        Path to the PDB file containing the structure of the protein.
        
    mutation_size : int
        An integer designating the residue sequence ID.
        
    Returns
    -------
    NumPy ndarray
        The Cartesian coordinates of the geometric center
    """
    pdb_parser = PDBParser(PERMISSIVE=1)
    model = pdb_parser.get_structure(id='tmp', file=pdb_file)
    
    # get all heavy atoms of the protein
    all_heavy_atoms = [a for a in model.get_atoms() if a.element != 'H']
    
    # get the mutated residue
    mutation_res = None
    for res in model.get_residues():
        if res.get_id()[1] == int(mutation_site):
            mutation_res = res
            break
    
    # get sidechain atoms
    if mutation_res is not None:
        heavy_atoms = [a for a in mutation_res.get_list() if a.element != 'H']
        # if only four heavy atoms, aka, GLY, use CA
        if len(heavy_atoms) == 4:
            side_chain_atoms = [heavy_atoms[1]]
        else:
            side_chain_atoms = mutation_res.get_list()[4:]
    else:
        print('Invalid mutation site: {}'.format(mutation_site))
        sys.exit(1)
        
    # search for neighbnoring atoms
    ns = NeighborSearch(atom_list=all_heavy_atoms, bucket_size=10)
    all_interaction_atoms = []
    for a in side_chain_atoms:
        interaction_atoms = ns.search(center=a.coord, radius=5, level='A')
        all_interaction_atoms += interaction_atoms
    # remove duplicates
    all_interaction_atoms = set(all_interaction_atoms)
    
    # compute geometric center of all interaction atoms
    geometric_center = np.zeros((3,))
    for a in all_interaction_atoms:
        geometric_center += a.coord / len(all_interaction_atoms)

    return geometric_center
Ejemplo n.º 20
0
    def make_neighbors(self, fl_struct):
        # create an empty NeighborsNet
        nn = NeighborsNet()
        # use NeighborSearch from Bio.PDB to compute distances
        ns = NeighborSearch(list(self.bio_struct.get_atoms()))
        # for each chain in structure that is not a dna-rna one
        for fl_chain in fl_struct.get_chains():
            if not fl_chain.rna_dna_chain:
                # for each residue in this chain
                for fl_res in fl_chain.residues:
                    # add a default entry
                    nn.add_default(fl_res)
                    # keep track of already inserted nieghbors (the search is mate for each atom in the residue)
                    already_have = []
                    # for each atom (coordinates) in the residue
                    for atom_coord in fl_res.atoms_coord:
                        # for each residue in range
                        for res in ns.search(atom_coord,
                                             self.config["neighbors_range"],
                                             level='R'):
                            # check if it is good atom and the same model, cause sometimes NS computes all models
                            if is_good_res(
                                    res) and fl_res.model_id == res_model_id(
                                        res) and not res.get_full_id(
                                        ) in already_have:
                                # try to get FlipperResidueAssociated
                                pos_2 = fl_struct.chains[res_chain_id(
                                    res)].string_index_map.get(
                                        res_string_index(res))
                                # print(fl_res.get_full_identifier(), res.get_full_id(), fl_struct.chains[res_chain_id(res)].string_index_map.get(res_string_index(res)))
                                if not pos_2 == None:
                                    fl_res_2 = fl_struct.chains[res_chain_id(
                                        res)].residues[pos_2]
                                    # if the chain is the same
                                    if fl_res.chain_id == fl_res_2.chain_id:
                                        already_have.append(res.get_full_id())
                                        if fl_res.pos_in_chain == fl_res_2.pos_in_chain:
                                            continue
                                        # if distance (as residue number) is less than threshold, then it is a short range neighbor
                                        if abs(
                                                fl_res.pos_in_chain -
                                                fl_res_2.pos_in_chain
                                        ) < self.config[
                                                "long_short_threshold"] and not fl_chain.have_gaps(
                                                    fl_res, fl_res_2):
                                            nn.add_short(fl_res, fl_res_2)
                                        # else it is a long rage neighbor
                                        else:
                                            nn.add_long(fl_res, fl_res_2)
                                    # if it is not in the same chain it is an inter chain neighbor
                                    else:
                                        nn.add_inter(fl_res, fl_res_2)
                                        already_have.append(res.get_full_id())

        return nn
Ejemplo n.º 21
0
def is_H_bond_old(atom_A, atom_D):
    """
    CG -
    acceptor:atom1
    donors:atom2
    source RDOCK
    X-A - - - D-Y
    is Hbond if:
    dist (A-D) <3.5
    100<angle(XAD)<180
    60<angle(YDA)<120
    """

    if atom_A.get_parent() != atom_D.get_parent():
        NS = NeighborSearch(
            [atom for atom in atom_A.get_parent() if atom_A != atom])
        is_hbond = False
        for atom_X in NS.search(center=atom_A.get_coord(),
                                radius=parametres.cutoff_bonds):
            angle = calcul_angle(atom_X.get_coord(), atom_A.get_coord(),
                                 atom_D.get_coord())
            angle = min(angle, 360 - angle)
            if 100 < angle < 180:
                is_hbond = True
                break

        if is_hbond:
            NS = NeighborSearch(
                [atom for atom in atom_D.get_parent() if atom_D != atom])
            is_hbond = False
            for atom_Y in NS.search(center=atom_D.get_coord(),
                                    radius=parametres.cutoff_bonds):
                angle = calcul_angle(atom_Y.get_coord(), atom_D.get_coord(),
                                     atom_A.get_coord())
                angle = min(angle, 360 - angle)
                if 60 < angle < 120:
                    is_hbond = True
                    break
            if is_hbond:
                return True

    return False
    def extract_feature(self):
        seed(self.seed)
        print_info_nn(
            " >>> Adding D1 surface shape distribution for database {0} ... ".
            format(self._database.name))
        overall_time = datetime.now()
        counter = 0
        if not os.path.exists(self._get_dir_name()):
            os.makedirs(self._get_dir_name())
        for complex_name in self._database.complexes.keys():
            protein_complex = self._database.complexes[complex_name]
            proteins = [
                protein_complex.unbound_formation.ligand,
                protein_complex.unbound_formation.receptor
            ]
            for protein in proteins:
                shape_dist_file = self._get_dir_name() + protein.name
                if not os.path.exists(shape_dist_file + ".npy"):
                    counter += 1
                    if counter <= 15:
                        print_info_nn("{0}, ".format(protein.name))
                    else:
                        counter = 0
                        print_info("{0}".format(protein.name))
                    atoms = protein.atoms
                    neighbour_search = NeighborSearch(atoms)
                    distributions = np.zeros(
                        (len(protein.residues), self.number_of_bins + 1))
                    for i in range(len(protein.residues)):
                        residue = protein.residues[i]
                        nearby_residues = [protein.biopython_residues[i]]
                        temp_nearby_residues = neighbour_search.search(
                            residue.center, self.radius, "R")
                        for nearby_residue in temp_nearby_residues:
                            if nearby_residue not in protein.biopython_residues:
                                continue
                            residues_index = protein.biopython_residues.index(
                                nearby_residue)
                            residue = protein.residues[residues_index]

                            if residue.get_feature(
                                    Features.RELATIVE_ACCESSIBLE_SURFACE_AREA
                            ) >= self.rASA_threshold:
                                nearby_residues.append(nearby_residue)
                        distributions[i, :] = self._compute_distribution(
                            nearby_residues, residue.center)
                    np.save(shape_dist_file, distributions)
                distributions = np.load(shape_dist_file + ".npy")
                for i in range(len(protein.residues)):
                    protein.residues[i].add_feature(
                        Features.D1_SURFACE_SHAPE_DISTRIBUTION,
                        distributions[i, :])
        print_info("took {0} seconds.".format(
            (datetime.now() - overall_time).seconds))
Ejemplo n.º 23
0
def check_clash(model, chain_to_add, clash_distance=2.5):
    """
    Checks wether a newly added chain has clashes with the rest of the structure. Returns a set containing the clashing chains and a boolean indicating whether the newly added chain is clashing with itself or not.
    model: previous structure.
    chain_to_add: new chain (Chain object from PDB.Chain).
    clash_distance: indicates a distance threshold (in Angstroms) to consider two atoms as clashing.
    """

    # Initialization of NeighbourSearch, that allows to find clashes
    neighbor_object = NeighborSearch(list(model.get_atoms()))

    structure_clashing_chains = set()
    total_clashes = 0

    # For each atom, clashes are compute
    for atom in chain_to_add.get_atoms():
        clashes = neighbor_object.search(atom.get_coord(), clash_distance)

        if len(clashes) > 0:  # If clashes are found...

            # Increase the total number of clashes and add the conflicting chains to the set
            for clash in clashes:
                structure_clashing_chains.add(
                    clash.get_parent().get_parent().id)
                total_clashes += 1

    # In case the new chain is conflicting with several chains
    if len(structure_clashing_chains) > 1 and total_clashes > 20:
        return structure_clashing_chains, False

    elif len(structure_clashing_chains
             ) == 1 and total_clashes > 20 and chain_to_add.id[1] == list(
                 structure_clashing_chains)[0][1]:

        clash_chain = model[0][list(structure_clashing_chains)
                               [0]]  # Conflictive chain
        RMSD = superimpose(clash_chain, chain_to_add)  # Compute RMSD

        # If the RMSD is lower than 3.0, the chain is clashing with itself.
        if RMSD <= 3.0:
            return structure_clashing_chains, True

        # Else the chain is clashing with another chain of the structure
        else:
            return structure_clashing_chains, False

    elif total_clashes > 20:
        return structure_clashing_chains, False

    # No clashes found
    else:
        return None, False
Ejemplo n.º 24
0
    def check_if_collide(self, component, point, radius):
        """
        check whether aparticulat point does not collide with any components atoms
        
        Return all component's atoms that have at least one
        atom within radius of center for given point
        """
        ns = NeighborSearch(list(component.get_atoms()))
        point_center = array([point.x, point.y, point.z])
        #we assume that pseudoresidue radius ;lis 1.5A as a collistion detection area.
        found_collisions = len(ns.search(point_center, radius + 1.5, "A"))

        return found_collisions
Ejemplo n.º 25
0
 def interaction(self, pdb_id, filename, domain_1, domain_2):
     """Returns a dict with informations (atoms, residues...) if two domains
     interact with each other, and returns False if not."""
     print "Searching for interactions in "+pdb_id+"..."
     # creates a strucuture object/class to extract atoms of the two domains
     model = structure(pdb_id).get_model(pdb_id, filename)
     residues_1 = structure(pdb_id).get_residues(model, domain_1)
     residues_2 = structure(pdb_id).get_residues(model, domain_2)
     atoms_1 = Selection.unfold_entities(residues_1, 'A')
     atoms_2 = Selection.unfold_entities(residues_2, 'A')
     # gets the serial numbers of the atoms
     numbers_1 = structure(pdb_id).serial_numbers(atoms_1)
     numbers_2 = structure(pdb_id).serial_numbers(atoms_2)
     # the search starts here !
     atoms = Selection.unfold_entities(model, 'A')
     nsearch = NeighborSearch(atoms)
     interacting_atoms_1 = []
     interacting_atoms_2 = []
     for atom in atoms:
         if atom.get_serial_number() in numbers_1:
             point = atom.get_coord()
             # This is how we detect an interaction, we put 5 angstroms
             # here.
             # This is the simplest method we can use, and we're not sure
             # that it is correct.
             # Originally we have planned to go further by doing a surface
             # and accesssion analysis, but we had no time.
             # We hope we can talk about that during the talk.
             neighbors = nsearch.search(point, 5)
             for neighbor in neighbors:
                 if neighbor.get_serial_number() in numbers_2:
                     interacting_atoms_2.append(neighbor)
                     if atom not in interacting_atoms_1:
                         interacting_atoms_1.append(atom)
     # returns a dict with all residues and atoms
     if len(interacting_atoms_2) > 0:
         infos = {}
         infos['1'] = {}
         infos['2'] = {}
         # just get the parent residues for the list of atoms
         interacting_residues_1 = structure(pdb_id).atoms2residues(
                 interacting_atoms_1)
         interacting_residues_2 = structure(pdb_id).atoms2residues(
                 interacting_atoms_2)
         infos['1']['atoms'] = interacting_atoms_1
         infos['2']['atoms'] = interacting_atoms_2
         infos['1']['residues'] = interacting_residues_1
         infos['2']['residues'] = interacting_residues_2
         return infos
     else: return False
Ejemplo n.º 26
0
 def getReferenceAtomsWithinSphere(self, residue, radius, caOnly=False):
     if (not residue.__class__.__name__ == 'Residue'):
         raise TypeError(
             "The function process(residue, radius, caOnly) expects the first argument of class Bio.PDB.Residue"
         )
     ns = NeighborSearch(Selection.unfold_entities(self.ref_chain, 'A'))
     ref_atoms = ns.search(residue['CA'].get_coord(), radius, level='A')
     if (caOnly):
         cas = []
         for a in ref_atoms:
             if (a.get_name() == 'CA'):
                 cas.append(a)
         return cas
     return ref_atoms
Ejemplo n.º 27
0
def get_neighbor_atoms(chain: ChainDesc, ligand: PhysicalResidue, produce_physical_atoms: bool = True) -> list:  # a list of physical atoms
    # use biopython neighboursearch to get list of AA atoms that close enough to ligand's atoms (using 10 angstroms)
    # - get list of all chain's atoms except ligand's atoms
    # - for each ligand's atom run neighbor search to find neighbors
    # - add that neighbors to result list of neighbors (exclude duplicates)

    # collect chain atoms
    chain_atoms = list()

    # get bio python residues of the chain
    bio_residues = list(chain.chain.get_residues())

    for residue in chain.get_residues():  # physical residue
        # do not count atoms from ligand itself
        if residue.get_residue_desc().get_short_name() == ligand.get_residue_desc().get_short_name():
            continue

        # do not count atoms from ligands
        # TODO: refactor
        if residue.get_residue_desc().get_short_name() not in database.get_amino_acids() + database.get_cofactors():
            continue
        elif residue.get_residue_desc().check_deeper():
            continue

        for atom in bio_residues[residue.get_index() - 1].get_atoms():  # BioPython atom!!!!
            chain_atoms.append(atom)

    neighbour_atoms = list()
    for atom in ligand.get_atoms():  # physical atom
        search = NeighborSearch(chain_atoms)
        current_neighbours = search.search(atom.get_coords(), 10.0)
        for neighbour in current_neighbours:  # BioPython atom!!!!
            if neighbour not in neighbour_atoms:
                neighbour_atoms.append(neighbour)
    if not produce_physical_atoms:
        return neighbour_atoms

    neighbour_physical_atoms = list()
    for neighbour in neighbour_atoms:
        terminus = None
        if neighbour.get_parent().get_id()[1] == 1:
            terminus = 'N'
        elif 'OXT' in [a.get_id() for a in neighbour.get_parent().get_atoms()]:
            terminus = 'C'
        atom_desc = database.get_residue(neighbour.get_parent(), terminus).get_atom(neighbour.get_id())
        physical_atom = PhysicalAtom(bio_atom=neighbour, atom_desc=atom_desc, coords=neighbour.get_coord())
        neighbour_physical_atoms.append(physical_atom)

    return neighbour_physical_atoms
Ejemplo n.º 28
0
def calculateNeighbors(filename, radius):
	data = {}
	structure = parser.get_structure(filename.split(".pdb")[0], filename)
	atom_list = Selection.unfold_entities(structure, 'A') # A for atoms
	residue_list = Selection.unfold_entities(structure, 'R') # R for residues
	neighbor_search = NeighborSearch(atom_list)
	
	for residue in residue_list:
		resid = str(residue.get_id()[1])
		contacts = []
		for atom in residue.get_list():
			contacts.extend(neighbor_search.search(atom.get_coord(), radius, level = "A"))
		burial = len(contacts)/len(residue.get_list())
		data[resid] = burial
	
	return data
 def extract_feature(self):
     seed(self.seed)
     counter = 0
     overall_time = datetime.now()
     print_info_nn(
         " >>> Adding D2 shape distribution for database {0} ... ".format(
             self._database.name))
     if not os.path.exists(self._get_dir_name()):
         os.makedirs(self._get_dir_name())
     for complex_name in self._database.complexes.keys():
         protein_complex = self._database.complexes[complex_name]
         proteins = [
             protein_complex.unbound_formation.ligand,
             protein_complex.unbound_formation.receptor
         ]
         for protein in proteins:
             shape_dist_file = self._get_dir_name() + protein.name
             if not os.path.exists(shape_dist_file + ".npy"):
                 counter += 1
                 if counter <= 15:
                     print_info_nn("{0}, ".format(protein.name))
                 else:
                     counter = 0
                     print_info("{0}".format(protein.name))
                 atoms = protein.atoms
                 neighbour_search = NeighborSearch(atoms)
                 distributions = np.zeros(
                     (len(protein.residues), self.number_of_bins))
                 # distributions = np.zeros((len(protein.residues), self.number_of_bins+2))
                 for i in range(len(protein.residues)):
                     residue = protein.residues[i]
                     nearby_residues = neighbour_search.search(
                         residue.center, self.radius, "R")
                     distributions[i, :] = self._compute_distribution(
                         nearby_residues)
                     # distributions[i:, -1] = len(nearby_residues)
                 np.save(shape_dist_file, distributions)
             distributions = np.load(shape_dist_file + ".npy")
             for i in range(len(protein.residues)):
                 protein.residues[i].add_feature(
                     Features.D2_PLAIN_SHAPE_DISTRIBUTION,
                     distributions[i, :])
                 # protein.residues[i].add_feature(Features.NUMBER_OF_NEIGHBOURS, distributions[i, -1])
     print_info("took {0} seconds.".format(
         (datetime.now() - overall_time).seconds))
Ejemplo n.º 30
0
def binding_site_residues(structure_path, ligand_name, distance_cutoff):
    """
    Get binding site residues (Biopython objects) from Diamond structure.
    """
    
    # Read structure
    parser = PDBParser()
    structures = parser.get_structure(structure_path.stem, structure_path)

    # Extract protein and ligand
    protein = get_protein(structures[0]['A'])
    ligand = get_ligand(structures[0]['A'], ligand_name)

    # Get residues around ligand centroid
    atoms  = Selection.unfold_entities(protein, 'A')
    ns = NeighborSearch(atoms)
    closest_residues = ns.search(get_centroid(ligand), distance_cutoff, 'R')
    
    return closest_residues
Ejemplo n.º 31
0
def CalculateDegreesOfBurial(ProteinModel, DistanceCutoff):
	# Set up atom search
	ListOfAtoms = Selection.unfold_entities(ProteinModel, "A")
	NeighborSearchOutput = NeighborSearch(ListOfAtoms)

	# Loop over all residues
	DegreesOfBurial = {}

	for Chain in ProteinModel:

		for Residue in Chain:

			if is_aa(Residue.get_resname(), standard = True):

				# Subtract one as the algorithm counts the atom itself
				BackboneNitrogen = Residue["N"]
				DegreesOfBurial[Chain, Residue] = len(NeighborSearchOutput.search(BackboneNitrogen.coord, DistanceCutoff)) - 1

	return DegreesOfBurial
Ejemplo n.º 32
0
    def search_H_bond_bis(self):
        """ KC - H bond research in model (parameter = 6) """

        acceptors = self.model.get_acceptors()
        donors = self.model.get_donors()
        NS = NeighborSearch(donors)
        self.H_bonds_bis = []
        #CG# for each acceptors, find all the donors with Hbond
        for atom_A in acceptors:
            for atom_D in NS.search(center=atom_A.get_coord(),
                                    radius=6,
                                    level="A"):
                if self.is_H_bond(atom_A, atom_D):
                    self.H_bonds_bis.append((atom_A, atom_D))

        self.H_bonds_networks_bis = self.search_network(self.H_bonds_bis,
                                                        _type='H_bonds')

        return
Ejemplo n.º 33
0
def fast_is_residue_interacting(residue, distance):
    '''
     Checks if the residue given is interacting with another residue from a diferent chain of the same ProteinStructure object
     :param residue: a residue object
     :param distance (int): the max distance you allow to consider an interaction
     :return: boolean
     '''
    if residue is not None:
        pdb = residue.parent.parent
        other_atoms = []
        for chain in pdb:
            if chain is not residue.parent:
                other_atoms.extend(chain.get_atoms_list())
        ns = NeighborSearch(other_atoms)
        for atom in residue:
            result = ns.search(center= atom.get_coord(), radius=distance)
            if len(result) > 0:
                return True
    return False
Ejemplo n.º 34
0
 def _remove_distant_hatatms(self, new_model, hetatm_chain):
     """Detach hetatms that are more than ``self.r_cutoff`` away from the main chain(s)."""
     ns = NeighborSearch(list(new_model.get_atoms()))
     hetatm_chain.id = [
         c for c in reversed(string.ascii_uppercase) if c not in self.chain_ids][0]
     res_idx = 0
     while res_idx < len(hetatm_chain):
         res_1 = hetatm_chain.child_list[res_idx]
         in_contact = False
         for atom_1 in res_1:
             interacting_residues = ns.search(atom_1.get_coord(), self.r_cutoff, 'R')
             if interacting_residues:
                 # logger.debug(res_1.id)
                 # logger.debug(interacting_residues)
                 in_contact = True
         if in_contact:
             res_idx += 1
             continue
         # logger.debug('Detaching child: {}'.format(res_1.id))
         hetatm_chain.detach_child(res_1.id)
Ejemplo n.º 35
0
    def interacting_residues(self, other_chain, dist=3.5):
        """
        Iterates through all the possible pair of atoms (one from self and the other from other_chain)
        and returns a list of the residue numbers from self that interact with other_chain.

        :param other_chain: a chain object to be compared with
        :param dist: distance of interaction in Armstrong (default = 4)
        :return: List of intreacting residues
        """
        ns = NeighborSearch(other_chain.get_atoms_list())
        interacting = list()
        for res in self:
            for atom in res:
                anything_close = ns.search(center=atom.get_coord(),
                                           radius=dist)
                if len(anything_close) > 0:
                    interacting.append(res.num)
                    break
        if len(interacting) > 0:
            return interacting
Ejemplo n.º 36
0
    def get_contacts(self, complex, contact_dist=5.0):
        receptor_atoms = Selection.unfold_entities(
            complex["receptor"]["structure"], 'A')
        ns = NeighborSearch(receptor_atoms)

        ligand_residues = Selection.unfold_entities(
            complex["ligand"]["structure"], 'R')

        contacts_lig = set([])
        contacts_rec = set([])
        for ligand_res in ligand_residues:
            if not ligand_res.get_resname() in standard_aa_names:
                continue
            lig_resname = dindex_to_1[d3_to_index[ligand_res.get_resname()]]
            lig_resnum = ligand_res.get_id()[1]
            if ligand_res.get_id()[2] != ' ':
                print('Skipping:', ligand_res.get_id())
                continue
            lig_chname = ligand_res.get_parent().get_id()
            res_contacts = []

            for lig_atom in ligand_res:
                neighbors = ns.search(lig_atom.get_coord(), contact_dist)
                res_contacts += Selection.unfold_entities(neighbors, 'R')

            for receptor_res in res_contacts:
                if not receptor_res.get_resname() in standard_aa_names:
                    continue
                rec_resname = dindex_to_1[d3_to_index[
                    receptor_res.get_resname()]]
                rec_resnum = receptor_res.get_id()[1]
                if receptor_res.get_id()[2] != ' ':
                    print('Skipping:', receptor_res.get_id())
                    continue
                rec_chname = receptor_res.get_parent().get_id()

                contacts_lig.add((lig_chname, lig_resnum, lig_resname))
                contacts_rec.add((rec_chname, rec_resnum, rec_resname))

        return LinkedSelection(list(contacts_rec)), LinkedSelection(
            list(contacts_lig))
    def extract_feature(self):
        seed(self.seed)
        print_info_nn(" >>> Adding D1 surface shape distribution for database {0} ... ".format(self._database.name))
        overall_time = datetime.now()
        counter = 0
        if not os.path.exists(self._get_dir_name()):
            os.makedirs(self._get_dir_name())
        for complex_name in self._database.complexes.keys():
            protein_complex = self._database.complexes[complex_name]
            proteins = [protein_complex.unbound_formation.ligand, protein_complex.unbound_formation.receptor]
            for protein in proteins:
                shape_dist_file = self._get_dir_name() + protein.name
                if not os.path.exists(shape_dist_file + ".npy"):
                    counter += 1
                    if counter <= 15:
                        print_info_nn("{0}, ".format(protein.name))
                    else:
                        counter = 0
                        print_info("{0}".format(protein.name))
                    atoms = protein.atoms
                    neighbour_search = NeighborSearch(atoms)
                    distributions = np.zeros((len(protein.residues), self.number_of_bins + 1))
                    for i in range(len(protein.residues)):
                        residue = protein.residues[i]
                        nearby_residues = [protein.biopython_residues[i]]
                        temp_nearby_residues = neighbour_search.search(residue.center, self.radius, "R")
                        for nearby_residue in temp_nearby_residues:
                            if nearby_residue not in protein.biopython_residues:
                                continue
                            residues_index = protein.biopython_residues.index(nearby_residue)
                            residue = protein.residues[residues_index]

                            if residue.get_feature(Features.RELATIVE_ACCESSIBLE_SURFACE_AREA) >= self.rASA_threshold:
                                nearby_residues.append(nearby_residue)
                        distributions[i, :] = self._compute_distribution(nearby_residues, residue.center)
                    np.save(shape_dist_file, distributions)
                distributions = np.load(shape_dist_file + ".npy")
                for i in range(len(protein.residues)):
                    protein.residues[i].add_feature(Features.D1_SURFACE_SHAPE_DISTRIBUTION, distributions[i, :])
        print_info("took {0} seconds.".format((datetime.now() - overall_time).seconds))
Ejemplo n.º 38
0
structure = PDBParser().get_structure('X', args.pdb)

center_atoms = []
pymol_command = ""

all_atom_list = [atom for atom in structure.get_atoms() if atom.name == 'CA' ]

for k in args.chain1 :
    chain_atoms = [atom for atom in structure[0][k].get_atoms() if atom.name == 'CA' ]

    center_atoms += chain_atoms

#atom_list = [x for x in all_atom_list if x not in center_atoms]

for j in args.chain2 :
    atom_list = [atom for atom in structure[0][j].get_atoms() if atom.name == 'CA' ]
    ns = NeighborSearch(atom_list)

    nearby_residues = {res for center_atom in center_atoms
                        for res in ns.search(center_atom.coord, 8.5, 'R')}

    print "\nNeighbor residues in chain ", j, ": \n"
    print sorted(res.id[1] for res in nearby_residues)

    pymol_command = "show spheres, chain " + j + " and resi "

    for m in sorted(res.id[1] for res in nearby_residues):
        pymol_command = pymol_command + str(m) + "+"

    print pymol_command[:-1] + " and name CA \n" 
Ejemplo n.º 39
0
    if residue.resname == args.ligand:
        ligand = residue
        break

if not ligand:
    print('[!!] Ligand residue \'{0}\' not found in structure'.format(args.ligand), file=sys.stderr)
    sys.exit(1)

# Calculate center of mass of the ligand
ligand_com = map(lambda x: sum(x)/len(x), zip(*[at.coord for at in ligand]))
ligand_com = np.asarray(ligand_com, dtype=np.float32)

# Calculate neighbors considering only aminoacid/nucleotide atoms (excl. waters, other ligands, etc)
sel_atoms = [at for at in structure.get_atoms() if at.parent.id[0] == ' ']
ns = NeighborSearch(sel_atoms)
neighbors = ns.search(ligand_com, 10.0, level='R') # 10A radius, return residues

# Calculate residue closer to each ligand atom and the respective distance
ligand_atoms = ligand.child_list
min_dist_list, _seen = [], set()

for l_at in ligand_atoms:
    distances = []
    for residue in neighbors:
        for r_at in residue:
            distances.append((r_at, l_at, r_at - l_at))

    distances.sort(key=lambda x: x[-1])
    min_dist = distances[0]
    # One restraint per residue to keep the number of restraints small
    if min_dist[0].parent not in _seen:
Ejemplo n.º 40
0
def get_interacting_residues(model, r_cutoff=5, skip_hetatm_chains=True):
    """Return residue-residue interactions between all chains in `model`.

    Parameters
    ----------
    model : biopython.Model
        Model to analyse.

    Returns
    -------
    dict
        A dictionary of interactions between chains i (0..n-1) and j (i+1..n).
        Keys are (chain_idx, chain_id, residue_idx, residue_resnum, residue_amino_acid) tuples.
        (e.g. (0, 'A', 0, '0', 'M'), (0, 1, '2', 'K'), ...)
        Values are a list of tuples having the same format as the keys.

    Examples
    --------
    You can reverse the order of keys and values like this::

        complement = dict()
        for key, values in get_interacting_chains(model):
            for value in values:
                complement.setdefault(value, set()).add(key)


    You can get a list of all interacting chains using this command::

        {(key[0], value[0])
         for (key, values) in get_interacting_chains(model).items()
         for value in values}

    """
    from Bio.PDB import NeighborSearch

    interactions_between_chains = dict()

    # Chain 1
    for chain_1_idx, chain_1 in enumerate(model):
        if skip_hetatm_chains and chain_is_hetatm(chain_1):
            message = (
                "Skipping chain_1 with idx {} because it contains only hetatms."
                .format(chain_1_idx)
            )
            logger.debug(message)
            continue
        chain_1_residue_ids = get_aa_residues(chain_1)

        # Chain 2
        for j, chain_2 in enumerate(model.child_list[chain_1_idx + 1:]):
            chain_2_idx = chain_1_idx + 1 + j
            if skip_hetatm_chains and chain_is_hetatm(chain_2):
                message = (
                    "Skipping chain_2 with idx {} because it contains only hetatms."
                    .format(chain_2_idx)
                )
                logger.debug(message)
                continue
            chain_2_residue_ids = get_aa_residues(chain_2)
            ns = NeighborSearch(list(chain_2.get_atoms()))

            # Residue 1
            for residue_1 in chain_1:
                try:
                    residue_1_idx = chain_1_residue_ids.index(residue_1.id)
                except ValueError:
                    continue
                residue_1_resnum = str(residue_1.id[1]) + residue_1.id[2].strip()
                residue_1_aa = convert_aa(residue_1.resname, quiet=True)
                residue_1_key = (
                    chain_1_idx, chain_1.id, residue_1_idx, residue_1_resnum, residue_1_aa
                )
                interacting_residues = set()
                for atom_1 in residue_1:
                    interacting_residues.update(ns.search(atom_1.get_coord(), r_cutoff, 'R'))

                # Residue 2
                interacting_residue_ids = []
                for residue_2 in interacting_residues:
                    try:
                        residue_2_idx = chain_2_residue_ids.index(residue_2.id)
                    except ValueError:
                        continue
                    residue_2_resnum = str(residue_2.id[1]) + residue_2.id[2].strip()
                    residue_2_aa = convert_aa(residue_2.get_resname(), quiet=True)
                    residue_2_key = (
                        chain_2_idx, chain_2.id, residue_2_idx, residue_2_resnum, residue_2_aa
                    )
                    interacting_residue_ids.append(residue_2_key)
                if interacting_residue_ids:
                    interactions_between_chains\
                        .setdefault(residue_1_key, set())\
                        .update(interacting_residue_ids)

    return interactions_between_chains