def extract_feature(self):
     seed(self.seed)
     counter = 0
     print_info_nn(" >>> Adding D2 category based shape distribution for database {0} ... ".format(self._database.name))
     overall_time = datetime.now()
     if not os.path.exists(self._get_dir_name()):
         os.makedirs(self._get_dir_name())
     for complex_name in self._database.complexes.keys():
         protein_complex = self._database.complexes[complex_name]
         proteins = [protein_complex.unbound_formation.ligand, protein_complex.unbound_formation.receptor]
         for protein in proteins:
             shape_dist_file = self._get_dir_name() + protein.name
             if not os.path.exists(shape_dist_file + ".npy"):
                 counter += 1
                 if counter <= 15:
                     print_info_nn("{0}, ".format(protein.name))
                 else:
                     counter = 0
                     print_info("{0}".format(protein.name))
                 atoms = protein.atoms
                 neighbour_search = NeighborSearch(atoms)
                 distributions = np.zeros((len(protein.residues), self.number_of_bins))
                 for i in range(len(protein.residues)):
                     residue = protein.residues[i]
                     nearby_residues = neighbour_search.search(residue.center, self.radius, "R")
                     distributions[i, :] = self._compute_distribution(nearby_residues)
                 np.save(shape_dist_file, distributions)
             distributions = np.load(shape_dist_file + ".npy")
             for i in range(len(protein.residues)):
                 protein.residues[i].add_feature(Features.D2_CATEGORY_SHAPE_DISTRIBUTION, distributions[i, :])
     print_info("took {0} seconds.".format((datetime.now() - overall_time).seconds))
def pdb_neighbors(pdb_f, pdb_id):
    structure = PDBParser().get_structure(pdb_id, pdb_f)
    atom_list = Selection.unfold_entities(structure, 'A')
    ns = NeighborSearch(atom_list)
    center_res = [res for res in structure.get_residues() if res.get_resname() in ['PTR','SEP','TPO']]

    neighbors = []
    for res in center_res:
        if res.get_resname() == 'PTR':
            atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OH']]
        elif res.get_resname() == 'SEP':
            atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OG']]
        elif res.get_resname() == 'TPO':
            atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OG1']]
        atom_neighbors = [ns.search(a.get_coord(),BOND_CUTOFF) for a in atoms]
        atom_neighbors = [atom for atoms in atom_neighbors for atom in atoms]
        atom_neighbors = list(set(atom_neighbors))
        atom_neighbors = [atom for atom in atom_neighbors if 'N' in atom.get_name() or 'O' in atom.get_name()]
        atom_neighbors = list(set(Selection.unfold_entities(atom_neighbors,'R')))
        atom_neighbors = [r for r in atom_neighbors if r != res]

        if len(atom_neighbors) > 0:
            res = res.get_resname()+'_'+str(res.get_id()[1])+'_'+res.get_parent().get_id()
            atom_neighbors = [n.get_resname()+'_'+str(n.get_id()[1])+'_'+n.get_parent().get_id() for n in atom_neighbors]
            neighbors.append((pdb_id,res,atom_neighbors))

    return neighbors
Example #3
0
def within(resnum, angstroms, chain_id, model, use_ca=False, custom_coord=None):
    """See: https://www.biostars.org/p/1816/ https://www.biostars.org/p/269579/

    Args:
        resnum (int):
        angstroms (float):
        chain_id (str):
        model (Model):
        use_ca (bool): If the alpha-carbon atom should be used for the search, otherwise use the last atom of the residue
        custom_coord (list): Custom XYZ coordinate to get within

    Returns:
        list: List of Bio.PDB.Residue.Residue objects

    """
    # XTODO: documentation
    # TODO: should have separate method for within a normal residue (can use "resnum" with a int) or a custom coord,
    # where you don't need to specify resnum
    atom_list = Selection.unfold_entities(model, 'A')
    ns = NeighborSearch(atom_list)

    if custom_coord:  # a list of XYZ coord
        target_atom_coord = np.array(custom_coord, 'f')
    else:
        target_residue = model[chain_id][resnum]
        if use_ca:
            target_atom = target_residue['CA']
        else:
            target_atom = target_residue.child_list[-1]
        target_atom_coord = np.array(target_atom.get_coord(), 'f')
    neighbors = ns.search(target_atom_coord, angstroms)
    residue_list = Selection.unfold_entities(neighbors, 'R')

    return residue_list
Example #4
0
def get_residues_distance_distribution(data, r):
    """
    This function computes the distribution of number of residues in certain radius around residues.

    """
    files = glob.glob(data + "*_b.pdb")
    dist = {}
    files.sort()
    file_counter = 0
    for bound_pbd in files:
        dist[bound_pbd] = []
    l = []
    for bound_pbd in files:
        file_counter += 1
        print "Protein " + str(file_counter) + "/" + str(len(files))
        s, a, r = read_pdb_file(bound_pbd)
        ns = NeighborSearch(a)
        res_counter = 0
        for res in r:
            b = 0 * res.child_list[0].get_coord()
            for atom in res.child_list:
                b += atom.get_coord()
            center = b / len(res.child_list)
            l.append(len(ns.search(center, 100, "R")))
            res_counter += 1
            # print "Residue " + str(res_counter) + "out of " + str(len(r))
    plot(np.bincount(l))
    show()
    print files
Example #5
0
    def search_connectivity(self, atom_center, rank, NS=None, previous=None):

        if NS is None:
            NS = NeighborSearch(self.model.atoms)

        resultat = [
            atom for atom in NS.search(center=atom_center.get_coord(),
                                       radius=parametres.cutoff_bonds,
                                       level="A")
            if atom not in [atom_center, previous]
        ]

        if rank == 1:
            return resultat
        else:
            connectivity = {}
            for atom in resultat:
                resultat_temp = self.search_connectivity(atom_center=atom,
                                                         rank=rank - 1,
                                                         NS=NS,
                                                         previous=atom_center)
                if len(resultat_temp) == 0:
                    connectivity[atom] = None
                else:
                    connectivity[atom] = resultat_temp
            return connectivity
Example #6
0
	def __init__(self, structure, chain_id, threshold):
		ns = NeighborSearch(list(structure.get_atoms()))

		for chain in structure[0]:
			if chain.id == chain_id:
				for residue in chain:
					if residue.id[0] == ' ':
						inter = 0
						intra = 0
						lon = 0
						shor = 0
	
	
						center = get_center(residue)
						for a in ns.search(center, threshold):  # Iterate over contacts
							if a.get_full_id()[2] == residue.get_full_id()[2]:
								#if abs(int(a.get_full_id()[1]) - int(residue.get_full_id()[1])) > 3:
								intra += 1
							else:
								inter += 1
							if np.linalg.norm(a.get_coord()-center) < threshold/2:
								shor += 1
							else:
								lon += 1
						self.contacts[residue.get_full_id()] = (inter, intra, lon, shor)
Example #7
0
def calculate_ic(structure, d_cutoff=5.0, selection=None):
    """
    Calculates intermolecular contacts in a parsed structure object.
    """
    atom_list = list(structure.get_atoms())
    ns = NeighborSearch(atom_list)
    all_list = ns.search_all(radius=d_cutoff, level="R")

    if selection:
        _sd = selection

        def _chain(x):
            return x.parent.id

        ic_list = [
            c for c in all_list
            if (_chain(c[0]) in _sd and _chain(c[1]) in _sd) and (
                _sd[_chain(c[0])] != _sd[_chain(c[1])])
        ]
    else:
        ic_list = [c for c in all_list if c[0].parent.id != c[1].parent.id]

    if not ic_list:
        raise ValueError("No contacts found for selection")

    return ic_list
Example #8
0
    def search_subs(
        self,
        hx_atom,
        cx_atom,
        cy_atom,
        hy_atom,
        j_atoms,
        chosen_j
    ):
        """ Description:

            Usage:

            Parameters:
        """

        subs_list = []
        # j_atoms = [j[0] for j in j_atoms]
        center = self.atom_dict[cy_atom][2]
        ns = NeighborSearch(self.atom_list)
        neighbors = ns.search(center, 1.7)
        for neigh_atom in neighbors:
            if neigh_atom.serial_number not in j_atoms:
                subs_list.append(neigh_atom)

        for subs in subs_list:
            self.j_dict[chosen_j]["substituents"][subs.serial_number] = {
                "SY": self.atom_dict[subs.serial_number][0],
                "HX": self.atom_dict[hx_atom][0],
                "CX": self.atom_dict[cx_atom][0],
                "CY": self.atom_dict[cy_atom][0],
                "element": self.atom_dict[subs.serial_number][1]
            }
def _get_contacts(pdb_path, chain_rec, chain_lig, contact_dist):
    structure = pdb_parser.get_structure('X', pdb_path)[0]

    receptor = [structure[chain_rec_id] for chain_rec_id in chain_rec]
    ligand = [structure[chain_lig_id] for chain_lig_id in chain_lig]

    receptor_atoms = Selection.unfold_entities(receptor, 'A')
    ns = NeighborSearch(receptor_atoms)

    ligand_residues = Selection.unfold_entities(ligand, 'R')
    contacts = set([])
    contacts_lig = set([])
    contacts_rec = set([])
    for ligand_res in ligand_residues:
        lig_resname = dindex_to_1[d3_to_index[ligand_res.get_resname()]]
        lig_resnum = ligand_res.get_id()[1]
        lig_chname = ligand_res.get_parent().get_id()
        res_contacts = []

        for lig_atom in ligand_res:
            neighbors = ns.search(lig_atom.get_coord(), contact_dist)
            res_contacts += Selection.unfold_entities(neighbors, 'R')

        for receptor_res in res_contacts:
            rec_resname = dindex_to_1[d3_to_index[receptor_res.get_resname()]]
            rec_resnum = receptor_res.get_id()[1]
            rec_chname = receptor_res.get_parent().get_id()

            contacts.add((rec_resname, rec_resnum, rec_chname, lig_resname,
                          lig_resnum, lig_chname))
            contacts_lig.add((lig_resname, lig_resnum, lig_chname))
            contacts_rec.add((rec_resname, rec_resnum, rec_chname))

    return contacts, contacts_rec, contacts_lig
Example #10
0
def pdb_neighbors(pdb_f, pdb_id):
    structure = PDBParser().get_structure(pdb_id, pdb_f)
    atom_list = Selection.unfold_entities(structure, 'A')
    ns = NeighborSearch(atom_list)
    center = [(a, a.get_coord()) for a in structure.get_atoms()
              if a.get_parent().get_resname() in ['PTR', 'SEP', 'TPO']
              and a.get_name() in ['O1P', 'O2P', 'O3P', 'OH', 'OG', 'OG1']]
    # if there are no phos-atomes, return
    if len(center) == 0:
        return ''
    neighbors = {}
    for a, c in center:
        # set neighbor distance cutoff
        neighbor_list = ns.search(c, BOND_CUTOFF)
        residue_list = list(set(Selection.unfold_entities(neighbor_list, 'R')))
        try:
            neighbors[Selection.unfold_entities(a, 'R')[0]] = [
                x for x in residue_list
                if not x == Selection.unfold_entities(a, 'R')[0]
            ]
        except:
            continue
    # add residue id and chain id
    neighbors_full = dict([('_'.join(
        [k.get_resname(),
         str(k.get_id()[1]),
         str(k.get_parent().get_id())]), [
             '_'.join([
                 vi.get_resname(),
                 str(vi.get_id()[1]),
                 str(vi.get_parent().get_id())
             ]) for vi in v
         ]) for k, v in neighbors.iteritems()])
    return neighbors_full
Example #11
0
    def collectSaltBridge(self, position, chain):
        model = self.structure[0]
        myres = model[chain][position]
        atoms = Selection.unfold_entities(model, 'A')  # A for atoms

        ns = NeighborSearch(atoms)
        if myres.get_resname() not in ['ARG', 'LYS', 'ASP', 'GLU', 'HIS']:
            is_sb = 0
        else:
            for atom in myres:
                atom_id = atom.get_full_id()
                if atom_id[4][0] in ['NH1', 'NH2', 'NZ', 'NE2']:
                    close_atoms = ns.search(
                        atom.coord, 4.5
                    )  # cutoff of 4 crieria fixed by Barlow, J M Thornton (PMID6887253) +0.5A to account for the unoptimised side chain
                    if any(atom in [atomtype.id for atomtype in close_atoms]
                           for atom in ['OE1', 'OE2', 'OD1', 'OD2']):
                        is_sb = 1
                        break
                    else:
                        is_sb = 0
                        break
                elif atom_id[4][0] in ['OE1', 'OE2', 'OD1', 'OD2']:
                    close_atoms = ns.search(atom.coord, 4.5)

                    if any(atom in [atomtype.id for atomtype in close_atoms]
                           for atom in ['NH1', 'NH2', 'NZ', 'NE2']):
                        is_sb = 1
                        break
                    else:
                        is_sb = 0
                        break
        return is_sb
Example #12
0
 def collectHbondsNumber(self, foldxIndivFile):
     wt_mut_chain_id = whatMutation(foldxIndivFile)
     prot_chain = wt_mut_chain_id[3]
     prot_mut_id = int(wt_mut_chain_id[2])
     model = self.structure[0]
     myres = model[prot_chain][prot_mut_id]
     atoms = Selection.unfold_entities(model, 'A')  # A for atoms
     ns = NeighborSearch(atoms)
     resname = myres.get_resname()
     h_bond_as_donor = []
     h_bond_as_acceptor = []
     for atom in myres:
         if atom.name in donors:
             #atoms  = Selection.unfold_entities(model, 'A')
             #print(atoms)
             # cutoff of 3.2 angstroms D-A, strong mostly covalent according to
             # Jeffrey, George A.; An introduction to hydrogen bonding, Oxford University Press, 1997.
             close_atoms = ns.search(atom.coord, 3.2)
             for close_atom in close_atoms:
                 full_atom_id = close_atom.get_full_id()
                 if (full_atom_id[3][1] != prot_mut_id) and (
                         full_atom_id[4][0]
                         in acceptors + main_chain_acceptors):
                     acceptor_atom_id = full_atom_id[3][1]
                     h_bond_as_donor.append(acceptor_atom_id)
         # do the same thing fro acceptor atoms
         if atom.name in acceptors:
             close_atoms = ns.search(atom.coord, 3.2)
             for close_atom in close_atoms:
                 full_atom_id = close_atom.get_full_id()
                 if (full_atom_id[3][1] != prot_mut_id) and (
                         full_atom_id[4][0] in donors + main_chain_donors):
                     acceptor_atom_id = full_atom_id[3][1]
                     h_bond_as_acceptor.append(acceptor_atom_id)
     return len(h_bond_as_donor) + len(h_bond_as_acceptor)
Example #13
0
 def __calculate_separate_simbox_statistics(self, component, radius): #, cells_taken_by_component):
     """
     calculates the number of grid cells describing each complex component.
     calculation is done when all components are located in the center of simulation box
     """
     self.cells_taken_by_component = {}
     grid = Grid("cubic", radius, 10*radius, -1.)
     grid.calculate_boundaries(component.pyrystruct.struct)
     grid.generate_cubic_grid([])
     
     #scitree = scipyconverter(grid.grid_cells)
     ns = NeighborSearch(grid.grid_cells)
                 
     for atom in component.pyrystruct.struct.get_atoms():
         center = array([atom.coord[0], atom.coord[1], atom.coord[2]])
         if atom.vdw <= radius: rad = radius* sqrt(3)
         else: rad = (radius*sqrt(3)) + atom.vdw
         neighbours = ns.search(center, rad, 'A') #atom.vdw + radius
         #neighbours = scitree.run_neisearch(center, rad, eps=0)
         self.cells_taken_by_component = self.__calculate_simbox_statistics(neighbours) #, cells_taken_by_component)
         if len(neighbours) == 0: print "OMGOMG"*10, component.pyrystruct.chain, atom.serial_number
      
     del grid
     del ns
     neighbours = []
     return len(self.cells_taken_by_component)
Example #14
0
def CheckClashes(structure, chain):
    """
    Checks for clashes at a radius = 2 between a PDB structure and all the atoms on the provided chain.
    Returns True or False depending if number of different residues clashing exceeds 25.
    Arguments:
        -structure: PDB structure.
        -chain: PDB chain structure to check if it has clashes with the main structure.
    """
    # declare NeighborSearch() object instance with all the atoms from the structure (model 0), that includes all chains of that structure.
    ns = NeighborSearch(unfold_entities(structure[0], 'A'))

    # iterate over atoms in input chain, search for close residues
    clashing_residues = set([])
    for atom in chain.get_atoms():
        close_res = ns.search(atom.get_coord(), radius=2, level="R")

        try:
            close_res.remove(atom.get_parent())
        except ValueError:
            pass

        for res in close_res:
            neighbor_res = (atom.get_parent(), res)
            clashing_residues.add(neighbor_res)
            if len(clashing_residues) > 25:
                return True

    return False
Example #15
0
def get_residues_distance_distribution(data, r):
    """
    This function computes the distribution of number of residues in certain radius around residues.

    """
    files = glob.glob(data + "*_b.pdb")
    dist = {}
    files.sort()
    file_counter = 0
    for bound_pbd in files:
        dist[bound_pbd] = []
    l = []
    for bound_pbd in files:
        file_counter += 1
        print "Protein " + str(file_counter) + "/" + str(len(files))
        s, a, r = read_pdb_file(bound_pbd)
        ns = NeighborSearch(a)
        res_counter = 0
        for res in r:
            b = 0 * res.child_list[0].get_coord()
            for atom in res.child_list:
                b += atom.get_coord()
            center = b / len(res.child_list)
            l.append(len(ns.search(center, 100, "R")))
            res_counter += 1
            # print "Residue " + str(res_counter) + "out of " + str(len(r))
    plot(np.bincount(l))
    show()
    print files
Example #16
0
    def contact_pairs(self):
        """obtain all residue pairs with atoms in close proximity"""

        radius = 4.5
        ns = NeighborSearch(self.atom_list)
        self.contact_pairs = ns.search_all(radius,
                                           level='R')  #residues returned
Example #17
0
def get_neighbor_chains(structure, options):
    """
    Takes an structure and returns a dictionary with chains as keys and a list of chains as values holding the
    chains with alpha carbons at less than 8 amstrongs from an alpha carbon of the key chain
    :param structure: structure we want to check for clashes.
    :return: dictionary with the clashes between chains.
    """

    neighbor_chains = {}
    ns = NeighborSearch(list(structure.get_atoms()))
    for chain in structure.get_chains():
        chains = list(structure.get_chains())
        neighbor_chains[chain] = set([])

        neighbor_dict = {}
        for atom in [atom for atom in chain.get_atoms() if
                     atom.get_id() == 'CA' or atom.get_id() == 'P']:  # For every alpha carbon or P in chain
            for atom2 in ns.search(atom.get_coord(), 8, level='A'):
                if atom2.get_id() == 'CA' or atom2.get_id() == 'P':  # for every alpha carbon or P at 8 angstroms or less from atom
                    chain2 = atom2.get_parent().get_parent()  # Getting to which chain it belongs
                    if chain2 != chain and chain2 not in neighbor_chains.keys():
                        # If it is not in the same chain and it is not already assessed
                        if chain2 not in neighbor_dict:
                            neighbor_dict[chain2] = 0
                        neighbor_dict[chain2] += 1

        if options.verbose:
            print('\n%s' % chain)
            for close_chain, contacts in neighbor_dict.items():
                print('%s: %s' % (close_chain, contacts))
                if contacts > 8:
                    neighbor_chains[chain].add(close_chain)
    return neighbor_chains
Example #18
0
def check_clash(str_name, v=False):
    """check_clash, fract of clashes!

        if zero contacts then error -> fix ->

        Problem, contacts, str_name: 311 505 na-prot_13536.pdb
        Sterical clashes  0.615841584158

        c is counter
        """
    if v: print('fn:', str_name)
    structure = open(str_name)
    #model = structure[0]
    atoms_A = []
    atoms_B = []
    for line in structure.readlines():
        if line[:4] == "ATOM":
            #print line
            at_nam = line[12:16].strip()
            coor = [float(line[30:38]), float(line[38:46]), float(line[46:54])]
            at = Atom.Atom(at_nam, coor, 0.0, 1.0, ' ', at_nam, 1, at_nam[0])
            if line[21] == "A":
                atoms_A.append(at)
            elif line[21] == "B":
                atoms_B.append(at)
            else:
                pass
    #atoms_B = Selection.unfold_entities(structure[0]['B'], 'A')
    #print len(atoms_A), len(atoms_B)
    if len(atoms_A) > len(atoms_B):
        less = atoms_B
        more = atoms_A
    else:
        less = atoms_A
        more = atoms_B
    problem = 0
    contacts = 0
    ns = NeighborSearch(more)
    for at in less:
        neighbors = ns.search(array(at.get_coord()), 2.0, 'A')
        if neighbors != []:
            problem += 1
            contacts += 1
        else:
            neighbors1 = ns.search(array(at.get_coord()), 4.0, 'A')
            if neighbors1 != []:
                contacts += 1
    if v:
        print('problem:', float(problem))
        print('contacts:', float(contacts))
    try:
        fract = float(problem) / float(contacts)
    except ZeroDivisionError:
        fract = problem  # or skip this structure
        if v: print('ZeroDivison -- skip:', problem, contacts, str_name)
        return fract

    #print 'Contacts, str_name:', problem, contacts, str_name, "Sterical clashes ", fract
    return fract
Example #19
0
def check_clash(str_name, v=True):
        """check_clash, fract of clashes!

        if zero contacts then error -> fix ->

        Problem, contacts, str_name: 311 505 na-prot_13536.pdb
        Sterical clashes  0.615841584158

        c is counter
        """
        print(str_name)
        structure = open(str_name)
        #model = structure[0]
        atoms_A = []
        atoms_B = []
        for line in structure.readlines():
            if line[:4] == "ATOM":
                #print line
                at_nam = line[12:16].strip()
                coor = [float(line[30:38]),float(line[38:46]), float(line[46:54])]	
                at = Atom.Atom(at_nam,coor,0.0,1.0,' ',at_nam,1,at_nam[0])
                if line[21] == "A":
                    atoms_A.append(at)
                elif line[21] == "B":
                    atoms_B.append(at)
                else: pass
        #atoms_B = Selection.unfold_entities(structure[0]['B'], 'A')
        #print len(atoms_A), len(atoms_B)
        if len(atoms_A) > len(atoms_B):
            less = atoms_B
            more = atoms_A
        else: 
            less = atoms_A
            more = atoms_B
        problem = 0
        contacts = 0 
        ns=NeighborSearch(more)
        for at in less:
             neighbors=ns.search(array(at.get_coord()),2.0,'A')
             if neighbors != []:
                 problem +=1
                 contacts +=1
             else:
                 neighbors1=ns.search(array(at.get_coord()),4.0,'A')
                 if neighbors1 != []:
                     contacts +=1
        if v:
                print('problem:', float(problem))
                print('contacts:', float(contacts))
        try:
            fract = float(problem)/float(contacts)
        except ZeroDivisionError:
            fract = problem # or skip this structure
            print('ZeroDivison -- skip:', problem, contacts, str_name)
            return fract

        #print 'Contacts, str_name:', problem, contacts, str_name, "Sterical clashes ", fract
        return fract
Example #20
0
def compute_interaction_center(pdb_file, mutation_site):
    """Computes the geometric center of all heavy atoms interacting with the mutated residue.
    
    Parameters
    ----------
    pdb_file : str
        Path to the PDB file containing the structure of the protein.
        
    mutation_size : int
        An integer designating the residue sequence ID.
        
    Returns
    -------
    NumPy ndarray
        The Cartesian coordinates of the geometric center
    """
    pdb_parser = PDBParser(PERMISSIVE=1)
    model = pdb_parser.get_structure(id='tmp', file=pdb_file)
    
    # get all heavy atoms of the protein
    all_heavy_atoms = [a for a in model.get_atoms() if a.element != 'H']
    
    # get the mutated residue
    mutation_res = None
    for res in model.get_residues():
        if res.get_id()[1] == int(mutation_site):
            mutation_res = res
            break
    
    # get sidechain atoms
    if mutation_res is not None:
        heavy_atoms = [a for a in mutation_res.get_list() if a.element != 'H']
        # if only four heavy atoms, aka, GLY, use CA
        if len(heavy_atoms) == 4:
            side_chain_atoms = [heavy_atoms[1]]
        else:
            side_chain_atoms = mutation_res.get_list()[4:]
    else:
        print('Invalid mutation site: {}'.format(mutation_site))
        sys.exit(1)
        
    # search for neighbnoring atoms
    ns = NeighborSearch(atom_list=all_heavy_atoms, bucket_size=10)
    all_interaction_atoms = []
    for a in side_chain_atoms:
        interaction_atoms = ns.search(center=a.coord, radius=5, level='A')
        all_interaction_atoms += interaction_atoms
    # remove duplicates
    all_interaction_atoms = set(all_interaction_atoms)
    
    # compute geometric center of all interaction atoms
    geometric_center = np.zeros((3,))
    for a in all_interaction_atoms:
        geometric_center += a.coord / len(all_interaction_atoms)

    return geometric_center
def get_interactions_between_chains(model, chain_id_1, chain_id_2, r_cutoff=6):
    """Calculate interactions between the residues of the two chains.

    An interaction is defines as a pair of residues where at least one pair of atom
    is closer than r_cutoff.

    .. deprecated:: 1.0
        Use python:fn:`get_interacting_residues` instead.
        It gives you both the residue index and the resnum.

    Returns
    -------
    OrderedDict
        Keys are (residue_number, residue_amino_acid) tuples
        (e.g. ('0', 'M'), ('1', 'Q'), ...).
        Values are lists of (residue_number, residue_amino_acid) tuples.
        (e.g. [('0', 'M'), ('1', 'Q'), ...]).
    """
    try:
        from Bio.PDB import NeighborSearch
    except ImportError as e:
        logger.warning('Importing Biopython NeighborSearch returned an error: {}'.format(e))
        logger.warning('Using the the slow version of the neighbour-finding algorithm...')
        return get_interactions_between_chains_slow(model, chain_id_1, chain_id_2, r_cutoff)

    # Extract the chains of interest from the model
    chain_1 = None
    chain_2 = None
    for child in model.get_list():
        if child.id == chain_id_1:
            chain_1 = child
        if child.id == chain_id_2:
            chain_2 = child
    if chain_1 is None or chain_2 is None:
        raise Exception('Chains %s and %s were not found in the model' % (chain_id_1, chain_id_2))

    ns = NeighborSearch(list(chain_2.get_atoms()))
    interactions_between_chains = OrderedDict()
    for idx, residue_1 in enumerate(chain_1):
        if residue_1.resname in AMINO_ACIDS and residue_1.id[0] == ' ':
            resnum_1 = str(residue_1.id[1]) + residue_1.id[2].strip()
            resaa_1 = convert_aa(residue_1.get_resname(), quiet=True)
            interacting_residues = set()
            for atom_1 in residue_1:
                interacting_residues.update(ns.search(atom_1.get_coord(), r_cutoff, 'R'))
            interacting_resids = []
            for residue_2 in interacting_residues:
                resnum_2 = str(residue_2.id[1]) + residue_2.id[2].strip()
                resaa_2 = convert_aa(residue_2.get_resname(), quiet=True)
                if residue_2.resname in AMINO_ACIDS and residue_2.id[0] == ' ':
                    interacting_resids.append((resnum_2, resaa_2,))
            if interacting_resids:
                interacting_resids.sort(
                    key=lambda x: int(''.join([c for c in x[0] if c.isdigit()])))
                interactions_between_chains[(resnum_1, resaa_1)] = interacting_resids
    return interactions_between_chains
Example #22
0
    def make_neighbors(self, fl_struct):
        # create an empty NeighborsNet
        nn = NeighborsNet()
        # use NeighborSearch from Bio.PDB to compute distances
        ns = NeighborSearch(list(self.bio_struct.get_atoms()))
        # for each chain in structure that is not a dna-rna one
        for fl_chain in fl_struct.get_chains():
            if not fl_chain.rna_dna_chain:
                # for each residue in this chain
                for fl_res in fl_chain.residues:
                    # add a default entry
                    nn.add_default(fl_res)
                    # keep track of already inserted nieghbors (the search is mate for each atom in the residue)
                    already_have = []
                    # for each atom (coordinates) in the residue
                    for atom_coord in fl_res.atoms_coord:
                        # for each residue in range
                        for res in ns.search(atom_coord,
                                             self.config["neighbors_range"],
                                             level='R'):
                            # check if it is good atom and the same model, cause sometimes NS computes all models
                            if is_good_res(
                                    res) and fl_res.model_id == res_model_id(
                                        res) and not res.get_full_id(
                                        ) in already_have:
                                # try to get FlipperResidueAssociated
                                pos_2 = fl_struct.chains[res_chain_id(
                                    res)].string_index_map.get(
                                        res_string_index(res))
                                # print(fl_res.get_full_identifier(), res.get_full_id(), fl_struct.chains[res_chain_id(res)].string_index_map.get(res_string_index(res)))
                                if not pos_2 == None:
                                    fl_res_2 = fl_struct.chains[res_chain_id(
                                        res)].residues[pos_2]
                                    # if the chain is the same
                                    if fl_res.chain_id == fl_res_2.chain_id:
                                        already_have.append(res.get_full_id())
                                        if fl_res.pos_in_chain == fl_res_2.pos_in_chain:
                                            continue
                                        # if distance (as residue number) is less than threshold, then it is a short range neighbor
                                        if abs(
                                                fl_res.pos_in_chain -
                                                fl_res_2.pos_in_chain
                                        ) < self.config[
                                                "long_short_threshold"] and not fl_chain.have_gaps(
                                                    fl_res, fl_res_2):
                                            nn.add_short(fl_res, fl_res_2)
                                        # else it is a long rage neighbor
                                        else:
                                            nn.add_long(fl_res, fl_res_2)
                                    # if it is not in the same chain it is an inter chain neighbor
                                    else:
                                        nn.add_inter(fl_res, fl_res_2)
                                        already_have.append(res.get_full_id())

        return nn
    def extract_feature(self):
        seed(self.seed)
        print_info_nn(
            " >>> Adding D1 surface shape distribution for database {0} ... ".
            format(self._database.name))
        overall_time = datetime.now()
        counter = 0
        if not os.path.exists(self._get_dir_name()):
            os.makedirs(self._get_dir_name())
        for complex_name in self._database.complexes.keys():
            protein_complex = self._database.complexes[complex_name]
            proteins = [
                protein_complex.unbound_formation.ligand,
                protein_complex.unbound_formation.receptor
            ]
            for protein in proteins:
                shape_dist_file = self._get_dir_name() + protein.name
                if not os.path.exists(shape_dist_file + ".npy"):
                    counter += 1
                    if counter <= 15:
                        print_info_nn("{0}, ".format(protein.name))
                    else:
                        counter = 0
                        print_info("{0}".format(protein.name))
                    atoms = protein.atoms
                    neighbour_search = NeighborSearch(atoms)
                    distributions = np.zeros(
                        (len(protein.residues), self.number_of_bins + 1))
                    for i in range(len(protein.residues)):
                        residue = protein.residues[i]
                        nearby_residues = [protein.biopython_residues[i]]
                        temp_nearby_residues = neighbour_search.search(
                            residue.center, self.radius, "R")
                        for nearby_residue in temp_nearby_residues:
                            if nearby_residue not in protein.biopython_residues:
                                continue
                            residues_index = protein.biopython_residues.index(
                                nearby_residue)
                            residue = protein.residues[residues_index]

                            if residue.get_feature(
                                    Features.RELATIVE_ACCESSIBLE_SURFACE_AREA
                            ) >= self.rASA_threshold:
                                nearby_residues.append(nearby_residue)
                        distributions[i, :] = self._compute_distribution(
                            nearby_residues, residue.center)
                    np.save(shape_dist_file, distributions)
                distributions = np.load(shape_dist_file + ".npy")
                for i in range(len(protein.residues)):
                    protein.residues[i].add_feature(
                        Features.D1_SURFACE_SHAPE_DISTRIBUTION,
                        distributions[i, :])
        print_info("took {0} seconds.".format(
            (datetime.now() - overall_time).seconds))
Example #24
0
def secondary_struc_cmap(chain,
                         sequence,
                         structure,
                         cutoff_distance=4.5,
                         cutoff_numcontacts=10,
                         exclude_neighbour=3,
                         ss_elements=['H', 'E', 'B', 'b', 'G']):

    atom_list = Selection.unfold_entities(chain, 'A')
    res_list = Selection.unfold_entities(chain, 'R')

    res_names, numbering = [], []
    for res in res_list:
        res_names.append(res.get_resname())
        numbering.append(res.get_id()[1])

    numbering = np.array(numbering)
    res_range = np.array(range(len(numbering)))

    assert len(structure) == len(
        numbering
    ), f'PDB file and Secondary structure map do not match!\n {chain.get_parent().get_parent().id} - PDB: {len(res_list)} Residues VS. STRIDE: {len(sequence)} Residues. '

    ns = NeighborSearch(atom_list)
    all_neighbours = ns.search_all(cutoff_distance, 'A')

    struc_length = len(structure)
    segment = np.zeros([struc_length], dtype='int')
    nseg = 1
    for i in range(struc_length):
        if structure[i] in ss_elements:
            segment[i] = nseg
            if i == struc_length:
                nseg += 1
            elif structure[i + 1] != structure[i]:
                nseg += 1
    nseg -= 1

    index_list = []
    for atompair in all_neighbours:
        res1 = res_range[numbering == atompair[0].get_parent().id[1]][0]
        res2 = res_range[numbering == atompair[1].get_parent().id[1]][0]

        if abs(res1 - res2) > exclude_neighbour:
            if segment[res1] != 0 and segment[res2] != 0 and segment[
                    res1] != segment[res2]:
                index_list.append((segment[res1] - 1, segment[res2] - 1))

    index_list.sort()
    count = Counter(index_list)
    index = [values for values in count if count[values] >= cutoff_numcontacts]

    return np.array(index), segment
Example #25
0
def check_clash(model, chain_to_add, clash_distance=2.5):
    """
    Checks wether a newly added chain has clashes with the rest of the structure. Returns a set containing the clashing chains and a boolean indicating whether the newly added chain is clashing with itself or not.
    model: previous structure.
    chain_to_add: new chain (Chain object from PDB.Chain).
    clash_distance: indicates a distance threshold (in Angstroms) to consider two atoms as clashing.
    """

    # Initialization of NeighbourSearch, that allows to find clashes
    neighbor_object = NeighborSearch(list(model.get_atoms()))

    structure_clashing_chains = set()
    total_clashes = 0

    # For each atom, clashes are compute
    for atom in chain_to_add.get_atoms():
        clashes = neighbor_object.search(atom.get_coord(), clash_distance)

        if len(clashes) > 0:  # If clashes are found...

            # Increase the total number of clashes and add the conflicting chains to the set
            for clash in clashes:
                structure_clashing_chains.add(
                    clash.get_parent().get_parent().id)
                total_clashes += 1

    # In case the new chain is conflicting with several chains
    if len(structure_clashing_chains) > 1 and total_clashes > 20:
        return structure_clashing_chains, False

    elif len(structure_clashing_chains
             ) == 1 and total_clashes > 20 and chain_to_add.id[1] == list(
                 structure_clashing_chains)[0][1]:

        clash_chain = model[0][list(structure_clashing_chains)
                               [0]]  # Conflictive chain
        RMSD = superimpose(clash_chain, chain_to_add)  # Compute RMSD

        # If the RMSD is lower than 3.0, the chain is clashing with itself.
        if RMSD <= 3.0:
            return structure_clashing_chains, True

        # Else the chain is clashing with another chain of the structure
        else:
            return structure_clashing_chains, False

    elif total_clashes > 20:
        return structure_clashing_chains, False

    # No clashes found
    else:
        return None, False
    def check_if_collide(self, component, point, radius):
        """
        check whether aparticulat point does not collide with any components atoms
        
        Return all component's atoms that have at least one
        atom within radius of center for given point
        """
        ns = NeighborSearch(list(component.get_atoms()))
        point_center = array([point.x, point.y, point.z])
        #we assume that pseudoresidue radius ;lis 1.5A as a collistion detection area.
        found_collisions = len(ns.search(point_center, radius + 1.5, "A"))

        return found_collisions
Example #27
0
 def interaction(self, pdb_id, filename, domain_1, domain_2):
     """Returns a dict with informations (atoms, residues...) if two domains
     interact with each other, and returns False if not."""
     print "Searching for interactions in "+pdb_id+"..."
     # creates a strucuture object/class to extract atoms of the two domains
     model = structure(pdb_id).get_model(pdb_id, filename)
     residues_1 = structure(pdb_id).get_residues(model, domain_1)
     residues_2 = structure(pdb_id).get_residues(model, domain_2)
     atoms_1 = Selection.unfold_entities(residues_1, 'A')
     atoms_2 = Selection.unfold_entities(residues_2, 'A')
     # gets the serial numbers of the atoms
     numbers_1 = structure(pdb_id).serial_numbers(atoms_1)
     numbers_2 = structure(pdb_id).serial_numbers(atoms_2)
     # the search starts here !
     atoms = Selection.unfold_entities(model, 'A')
     nsearch = NeighborSearch(atoms)
     interacting_atoms_1 = []
     interacting_atoms_2 = []
     for atom in atoms:
         if atom.get_serial_number() in numbers_1:
             point = atom.get_coord()
             # This is how we detect an interaction, we put 5 angstroms
             # here.
             # This is the simplest method we can use, and we're not sure
             # that it is correct.
             # Originally we have planned to go further by doing a surface
             # and accesssion analysis, but we had no time.
             # We hope we can talk about that during the talk.
             neighbors = nsearch.search(point, 5)
             for neighbor in neighbors:
                 if neighbor.get_serial_number() in numbers_2:
                     interacting_atoms_2.append(neighbor)
                     if atom not in interacting_atoms_1:
                         interacting_atoms_1.append(atom)
     # returns a dict with all residues and atoms
     if len(interacting_atoms_2) > 0:
         infos = {}
         infos['1'] = {}
         infos['2'] = {}
         # just get the parent residues for the list of atoms
         interacting_residues_1 = structure(pdb_id).atoms2residues(
                 interacting_atoms_1)
         interacting_residues_2 = structure(pdb_id).atoms2residues(
                 interacting_atoms_2)
         infos['1']['atoms'] = interacting_atoms_1
         infos['2']['atoms'] = interacting_atoms_2
         infos['1']['residues'] = interacting_residues_1
         infos['2']['residues'] = interacting_residues_2
         return infos
     else: return False
Example #28
0
 def getReferenceAtomsWithinSphere(self, residue, radius, caOnly=False):
     if (not residue.__class__.__name__ == 'Residue'):
         raise TypeError(
             "The function process(residue, radius, caOnly) expects the first argument of class Bio.PDB.Residue"
         )
     ns = NeighborSearch(Selection.unfold_entities(self.ref_chain, 'A'))
     ref_atoms = ns.search(residue['CA'].get_coord(), radius, level='A')
     if (caOnly):
         cas = []
         for a in ref_atoms:
             if (a.get_name() == 'CA'):
                 cas.append(a)
         return cas
     return ref_atoms
Example #29
0
def get_neighbor_atoms(chain: ChainDesc, ligand: PhysicalResidue, produce_physical_atoms: bool = True) -> list:  # a list of physical atoms
    # use biopython neighboursearch to get list of AA atoms that close enough to ligand's atoms (using 10 angstroms)
    # - get list of all chain's atoms except ligand's atoms
    # - for each ligand's atom run neighbor search to find neighbors
    # - add that neighbors to result list of neighbors (exclude duplicates)

    # collect chain atoms
    chain_atoms = list()

    # get bio python residues of the chain
    bio_residues = list(chain.chain.get_residues())

    for residue in chain.get_residues():  # physical residue
        # do not count atoms from ligand itself
        if residue.get_residue_desc().get_short_name() == ligand.get_residue_desc().get_short_name():
            continue

        # do not count atoms from ligands
        # TODO: refactor
        if residue.get_residue_desc().get_short_name() not in database.get_amino_acids() + database.get_cofactors():
            continue
        elif residue.get_residue_desc().check_deeper():
            continue

        for atom in bio_residues[residue.get_index() - 1].get_atoms():  # BioPython atom!!!!
            chain_atoms.append(atom)

    neighbour_atoms = list()
    for atom in ligand.get_atoms():  # physical atom
        search = NeighborSearch(chain_atoms)
        current_neighbours = search.search(atom.get_coords(), 10.0)
        for neighbour in current_neighbours:  # BioPython atom!!!!
            if neighbour not in neighbour_atoms:
                neighbour_atoms.append(neighbour)
    if not produce_physical_atoms:
        return neighbour_atoms

    neighbour_physical_atoms = list()
    for neighbour in neighbour_atoms:
        terminus = None
        if neighbour.get_parent().get_id()[1] == 1:
            terminus = 'N'
        elif 'OXT' in [a.get_id() for a in neighbour.get_parent().get_atoms()]:
            terminus = 'C'
        atom_desc = database.get_residue(neighbour.get_parent(), terminus).get_atom(neighbour.get_id())
        physical_atom = PhysicalAtom(bio_atom=neighbour, atom_desc=atom_desc, coords=neighbour.get_coord())
        neighbour_physical_atoms.append(physical_atom)

    return neighbour_physical_atoms
Example #30
0
def get_contacts(model, cutOff=5., minSeqDist=5):
    contacts = list()

    ns = NeighborSearch(list(model.get_atoms()))
    foundPairs = ns.search_all(cutOff)

    for pair in foundPairs:
        fullAtomIDs = [pair[0].get_full_id(), pair[1].get_full_id()]
        distance = pair[1] - pair[0]
        weight = 1.

        contacts.append((fullAtomIDs, distance, weight))
        pass

    return contacts
Example #31
0
    def search_stacking(self):
        """ KC - stacking research in model """

        #KC#CG# list of aromatics residues atoms
        atom_for_search = []
        for res in self.model.res:
            if res.resname in ['PHE', 'TYR', 'TRP', 'HIS']:
                for name in parametres.dico_cycles[res.resname]["cycle"]:
                    atom_for_search.append(res[name])

        #KC#CG# list of residues pairs that have atoms pairs within 6A radius
        self.aromatic_less_5 = NeighborSearch(atom_for_search).search_all(
            6, level='R')

        #KC#CG# search of stacking
        self.stacking = []
        for res1, res2 in self.aromatic_less_5:
            if is_stacking(res1, res2):
                if res1.get_id()[1] < res2.get_id()[1]:
                    self.stacking.append((res1, res2))
                else:
                    self.stacking.append((res2, res1))

        self.stacking_networks = self.search_network(self.stacking,
                                                     _type='stacking')
Example #32
0
def pdb_dist(pdb_f, pdb_id):
    structure = PDBParser().get_structure(pdb_id, pdb_f)
    atom_list = Selection.unfold_entities(structure, 'A')
    ns = NeighborSearch(atom_list)
    center = [a for a in structure.get_atoms() if a.get_parent().get_resname(
    ) in ['PTR', 'SEP', 'TPO'] and a.get_name() in ['O1P', 'O2P', 'O3P', 'OH', 'OG', 'OG1']]

    # neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname(
    # ) in ['ARG'] and a.get_name() in ['NE', 'NH2', 'NH1']]
    # neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname(
    # ) in ['Lys'] and a.get_name() in ['NZ']]
    neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname(
    ) in ['HIS'] and a.get_name() in ['ND1','NE2']]

    def calc_dist(a, b):
        vector = a.coord - b.coord
        return np.sqrt(np.sum(vector * vector))

    dist = {}
    for c in center:
        for n in neighbors:
            value = calc_dist(c, n)
            key = str(c.get_parent()) + '_' + str(n.get_parent())
            if not key in dist.keys():
                dist[key] = [value]
            else:
                dist[key].append(value)

    for k, v in dist.items():
        dist[k] = min(v)

    dist = [v for k, v in dist.items()]

    return dist
def filter_structure(pdb,chain,lig,lig_num):
  # pdb_input = pdb_path + "/" + pdb + ":" + chain + ":" + lig + ":" + lig_num + ".atoms.pdb"
  pdb_input = pdb_path + "/" + pdb + ".pdb"
  pdb_output = output_path + "/" + pdb + ":" + chain + ":" + lig + ":" + lig_num + ".atoms.pdb"
  
  structure = PDBParser(QUIET=1).get_structure(pdb, pdb_input)
  atoms_pairs = NeighborSearch( list( structure.get_atoms() ) ).search_all(search_radius)

  res_list = set()

  for atom_pair in atoms_pairs:
    res1 = atom_pair[0].parent
    res_chain1 = res1.parent.id
    res_name1 = res1.resname
    res_num1 =  str(res1.id[1])  
  
    res2 = atom_pair[1].parent
    res_chain2 = res2.parent.id
    res_name2 = res2.resname
    res_num2 = str(res2.id[1])

    if ( (res_chain1 == chain and res_name1 == lig and res_num1 == lig_num) or (res_chain2 == chain and res_name2 == lig and res_num2 == lig_num) ):    
      res_list.add(res1)
      res_list.add(res2)  

  io = PDBIO()
  io.set_structure(structure)
  io.save(pdb_output, ResSelect(res_list))
Example #34
0
 def interchain_residue_contacts(self, chain_ids_1, chain_ids_2, radius):
     """ Generate a list of residue contacts between two chains. """
     all_chains = {chain.get_id(): chain for chain in self.get_chains()}
     selected_chains = {
         chain_id: chain
         for chain_id, chain in all_chains.items()
         if chain_id in chain_ids_1 + chain_ids_2
     }
     atoms = [
         atom for chain_id, chain in selected_chains.items()
         for atom in Selection.unfold_entities(chain, "A")
     ]
     residue_contacts = NeighborSearch(atoms).search_all(radius, "R")
     classified_contacts = defaultdict(list)
     for contact in residue_contacts:
         chain_1, chain_2 = [
             residue.get_parent().get_id() for residue in contact
         ]
         if chain_1 in chain_ids_1 and chain_2 in chain_ids_2:
             classified_contacts[(chain_1, chain_2)].append({
                 chain_1:
                 contact[0],
                 chain_2:
                 contact[1]
             })
         elif chain_2 in chain_ids_1 and chain_1 in chain_ids_2:
             classified_contacts[(chain_2, chain_1)].append({
                 chain_1:
                 contact[0],
                 chain_2:
                 contact[1]
             })
     return classified_contacts
Example #35
0
def calculateNeighbors(filename, radius):
	data = {}
	structure = parser.get_structure(filename.split(".pdb")[0], filename)
	atom_list = Selection.unfold_entities(structure, 'A') # A for atoms
	residue_list = Selection.unfold_entities(structure, 'R') # R for residues
	neighbor_search = NeighborSearch(atom_list)
	
	for residue in residue_list:
		resid = str(residue.get_id()[1])
		contacts = []
		for atom in residue.get_list():
			contacts.extend(neighbor_search.search(atom.get_coord(), radius, level = "A"))
		burial = len(contacts)/len(residue.get_list())
		data[resid] = burial
	
	return data
def calculate_ic(structure, d_cutoff=5.5, selection=None):
    """
    Calculates intermolecular contacts in a parsed structure object.
    """
    atom_list = list(structure.get_atoms())
    ns = NeighborSearch(atom_list)
    all_list = ns.search_all(radius=d_cutoff, level='R')

    if selection:
        _sd = selection_dict
        ic_list = [c for c in all_list if (c[0].parent.id in _sd and c[1].parent.id in _sd)
                    and (_sd[c[0].parent.id] != _sd[c[1].parent.id]) ]
    else:
        ic_list = [c for c in all_list if c[0].parent.id != c[1].parent.id]

    if not ic_list:
        raise ValueError('No contacts found for selection')

    return ic_list
 def _remove_distant_hatatms(self, new_model, hetatm_chain):
     """Detach hetatms that are more than ``self.r_cutoff`` away from the main chain(s)."""
     ns = NeighborSearch(list(new_model.get_atoms()))
     hetatm_chain.id = [
         c for c in reversed(string.ascii_uppercase) if c not in self.chain_ids][0]
     res_idx = 0
     while res_idx < len(hetatm_chain):
         res_1 = hetatm_chain.child_list[res_idx]
         in_contact = False
         for atom_1 in res_1:
             interacting_residues = ns.search(atom_1.get_coord(), self.r_cutoff, 'R')
             if interacting_residues:
                 # logger.debug(res_1.id)
                 # logger.debug(interacting_residues)
                 in_contact = True
         if in_contact:
             res_idx += 1
             continue
         # logger.debug('Detaching child: {}'.format(res_1.id))
         hetatm_chain.detach_child(res_1.id)
    def extract_feature(self):
        seed(self.seed)
        print_info_nn(" >>> Adding D1 surface shape distribution for database {0} ... ".format(self._database.name))
        overall_time = datetime.now()
        counter = 0
        if not os.path.exists(self._get_dir_name()):
            os.makedirs(self._get_dir_name())
        for complex_name in self._database.complexes.keys():
            protein_complex = self._database.complexes[complex_name]
            proteins = [protein_complex.unbound_formation.ligand, protein_complex.unbound_formation.receptor]
            for protein in proteins:
                shape_dist_file = self._get_dir_name() + protein.name
                if not os.path.exists(shape_dist_file + ".npy"):
                    counter += 1
                    if counter <= 15:
                        print_info_nn("{0}, ".format(protein.name))
                    else:
                        counter = 0
                        print_info("{0}".format(protein.name))
                    atoms = protein.atoms
                    neighbour_search = NeighborSearch(atoms)
                    distributions = np.zeros((len(protein.residues), self.number_of_bins + 1))
                    for i in range(len(protein.residues)):
                        residue = protein.residues[i]
                        nearby_residues = [protein.biopython_residues[i]]
                        temp_nearby_residues = neighbour_search.search(residue.center, self.radius, "R")
                        for nearby_residue in temp_nearby_residues:
                            if nearby_residue not in protein.biopython_residues:
                                continue
                            residues_index = protein.biopython_residues.index(nearby_residue)
                            residue = protein.residues[residues_index]

                            if residue.get_feature(Features.RELATIVE_ACCESSIBLE_SURFACE_AREA) >= self.rASA_threshold:
                                nearby_residues.append(nearby_residue)
                        distributions[i, :] = self._compute_distribution(nearby_residues, residue.center)
                    np.save(shape_dist_file, distributions)
                distributions = np.load(shape_dist_file + ".npy")
                for i in range(len(protein.residues)):
                    protein.residues[i].add_feature(Features.D1_SURFACE_SHAPE_DISTRIBUTION, distributions[i, :])
        print_info("took {0} seconds.".format((datetime.now() - overall_time).seconds))
Example #39
0
structure = PDBParser().get_structure('X', args.pdb)

center_atoms = []
pymol_command = ""

all_atom_list = [atom for atom in structure.get_atoms() if atom.name == 'CA' ]

for k in args.chain1 :
    chain_atoms = [atom for atom in structure[0][k].get_atoms() if atom.name == 'CA' ]

    center_atoms += chain_atoms

#atom_list = [x for x in all_atom_list if x not in center_atoms]

for j in args.chain2 :
    atom_list = [atom for atom in structure[0][j].get_atoms() if atom.name == 'CA' ]
    ns = NeighborSearch(atom_list)

    nearby_residues = {res for center_atom in center_atoms
                        for res in ns.search(center_atom.coord, 8.5, 'R')}

    print "\nNeighbor residues in chain ", j, ": \n"
    print sorted(res.id[1] for res in nearby_residues)

    pymol_command = "show spheres, chain " + j + " and resi "

    for m in sorted(res.id[1] for res in nearby_residues):
        pymol_command = pymol_command + str(m) + "+"

    print pymol_command[:-1] + " and name CA \n" 
Example #40
0
SCALAR_EXPRESSION %s_o = %s_startenergy + 4.91
SCALAR_EXPRESSION %s_s = %s
SCALAR_EXPRESSION %s_exp = exp(%s_s*(%s_currentenergy-%s_o))
SCALAR_EXPRESSION %s_k = %s
SCALAR_EXPRESSION %s_sig = (1-%s_k)+(%s_k/(1+%s_exp))

""") %(structure_id, structurefilename, correspondencefilename, resfilename, structure_id, startenergy, structure_id, structure_id, structure_id, s_value, structure_id, structure_id, structure_id, structure_id, structure_id, k_value, structure_id, structure_id, structure_id, structure_id)
		fitnessfile.write(outstring)

		fitnessstring = fitnessstring + str("*%s_sig") % (structure_id)
		

		nucleosome = structure[0]

		atom_list = Selection.unfold_entities(nucleosome, 'A') # A for atoms
		neighbor_search = NeighborSearch(atom_list)

		contacts_list = neighbor_search.search_all(radius, level = 'R')
		
		repack_residues = []
		for contact in contacts_list:
			res1 = contact[0]
			res2 = contact[1]
			res1id = int(res1.get_id()[1])
			chain1 = res1.get_parent()
			chain1id = chain1.get_id()
			res2id = int(res2.get_id()[1])
			chain2 = res2.get_parent()
			chain2id = chain2.get_id()
			
			res1_in_patch = False
        PDB = pdb_biomol_match.group(1)
        BIOMOL_ID = pdb_biomol_match.group(2)

    pdb_rcsb_asm_match = re.search(PDB_RCSB_ASM_REGEX, INPUT_FILENAME)
    if pdb_rcsb_asm_match:
        PDB = pdb_rcsb_asm_match.group(1)
        BIOMOL_ID = pdb_rcsb_asm_match.group(2)

    # LOAD STRUCTURE
    structure = PDBParser().get_structure('structure', INPUT_FILE)
    structure_atoms = list(structure.get_atoms())

    logging.info('Loaded PDB structure (BioPython).')

    # CONSTRUCT KDTREE
    neighborsearch = NeighborSearch(structure_atoms)

    logging.info('Constructured NeighborSearch.')

    # GET INTERACTIONS
    logging.info('Calculating interactions...')
    for interaction_level in 'ARC':

        if interaction_level in OUTPUTS:

            logging.info('Calculating interactions for {}s...'.format(
                LEVEL_MAP[interaction_level]))

            pairs = neighborsearch.search_all(INTERACTION_THRESHOLD,
                                              level=interaction_level)
Example #42
0
    def _build_interface(self, model, id, threshold, rsa_calculation, rsa_threshold, include_waters=False, *chains):
        """
        Return the interface of a model
        """

        self.threshold=threshold

        # Recover chain list from initial unpacking
        chain_list = self.chain_list

        # Unfold atom list
        atom_list = []
        for c in model:
            if c.id in chain_list:
                atom_list.extend(Selection.unfold_entities(c,'A'))

        # Using of NeighborSearch class in order to get the list of all residues at least than
        # the threshold distance of each others
        ns=NeighborSearch(atom_list)
        pairs=ns.search_all(threshold, 'R')

        if not pairs:
            raise ValueError("No atoms found in the interface")        

        # Selection of residues pairs
        # 1. Exclude water contacts
        # 2. Filter same-chain contacts
        # 3. Filter user-defined chain pairs

        uniq_pairs=[]

        for pair in pairs:
             
            pair_resnames = (pair[0].resname, pair[1].resname)
            pair_chains = (pair[0].parent.id, pair[1].parent.id)

            if (not include_waters and 'HOH' in pair_resnames) or (pair_chains[0] == pair_chains[1]):
                continue

            if not (chains and not (pair_chains in chains)):
                uniq_pairs.append(pair)

        # Build the Interface
        # 1. Iterate over the pair list
        # 2. Add residues.

        for resA, resB in uniq_pairs:
            if resA not in self.interface:
                self._add_residue(resA)
            if resB not in self.interface:
                self._add_residue(resB)
                
        # Accessible surface area calculated for each residue
        # if naccess setup on user computer and rsa_calculation
        # argument is TRUE
        if rsa_calculation and os.system('which naccess') == 0:
            rsa_pairs=self._rsa_calculation(model, chain_list, rsa_threshold)
            
        for res in rsa_pairs:
            if res not in self.interface:
                self._add_residue(res)
        self._secondary_structure(model)
        #interface=uniq_pairs
        self.interface.uniq_pairs=uniq_pairs
for residue in structure.get_residues():
    if residue.resname == args.ligand:
        ligand = residue
        break

if not ligand:
    print('[!!] Ligand residue \'{0}\' not found in structure'.format(args.ligand), file=sys.stderr)
    sys.exit(1)

# Calculate center of mass of the ligand
ligand_com = map(lambda x: sum(x)/len(x), zip(*[at.coord for at in ligand]))
ligand_com = np.asarray(ligand_com, dtype=np.float32)

# Calculate neighbors considering only aminoacid/nucleotide atoms (excl. waters, other ligands, etc)
sel_atoms = [at for at in structure.get_atoms() if at.parent.id[0] == ' ']
ns = NeighborSearch(sel_atoms)
neighbors = ns.search(ligand_com, 10.0, level='R') # 10A radius, return residues

# Calculate residue closer to each ligand atom and the respective distance
ligand_atoms = ligand.child_list
min_dist_list, _seen = [], set()

for l_at in ligand_atoms:
    distances = []
    for residue in neighbors:
        for r_at in residue:
            distances.append((r_at, l_at, r_at - l_at))

    distances.sort(key=lambda x: x[-1])
    min_dist = distances[0]
    # One restraint per residue to keep the number of restraints small
def get_interacting_residues(model, r_cutoff=5, skip_hetatm_chains=True):
    """Return residue-residue interactions between all chains in `model`.

    Parameters
    ----------
    model : biopython.Model
        Model to analyse.

    Returns
    -------
    dict
        A dictionary of interactions between chains i (0..n-1) and j (i+1..n).
        Keys are (chain_idx, chain_id, residue_idx, residue_resnum, residue_amino_acid) tuples.
        (e.g. (0, 'A', 0, '0', 'M'), (0, 1, '2', 'K'), ...)
        Values are a list of tuples having the same format as the keys.

    Examples
    --------
    You can reverse the order of keys and values like this::

        complement = dict()
        for key, values in get_interacting_chains(model):
            for value in values:
                complement.setdefault(value, set()).add(key)


    You can get a list of all interacting chains using this command::

        {(key[0], value[0])
         for (key, values) in get_interacting_chains(model).items()
         for value in values}

    """
    from Bio.PDB import NeighborSearch

    interactions_between_chains = dict()

    # Chain 1
    for chain_1_idx, chain_1 in enumerate(model):
        if skip_hetatm_chains and chain_is_hetatm(chain_1):
            message = (
                "Skipping chain_1 with idx {} because it contains only hetatms."
                .format(chain_1_idx)
            )
            logger.debug(message)
            continue
        chain_1_residue_ids = get_aa_residues(chain_1)

        # Chain 2
        for j, chain_2 in enumerate(model.child_list[chain_1_idx + 1:]):
            chain_2_idx = chain_1_idx + 1 + j
            if skip_hetatm_chains and chain_is_hetatm(chain_2):
                message = (
                    "Skipping chain_2 with idx {} because it contains only hetatms."
                    .format(chain_2_idx)
                )
                logger.debug(message)
                continue
            chain_2_residue_ids = get_aa_residues(chain_2)
            ns = NeighborSearch(list(chain_2.get_atoms()))

            # Residue 1
            for residue_1 in chain_1:
                try:
                    residue_1_idx = chain_1_residue_ids.index(residue_1.id)
                except ValueError:
                    continue
                residue_1_resnum = str(residue_1.id[1]) + residue_1.id[2].strip()
                residue_1_aa = convert_aa(residue_1.resname, quiet=True)
                residue_1_key = (
                    chain_1_idx, chain_1.id, residue_1_idx, residue_1_resnum, residue_1_aa
                )
                interacting_residues = set()
                for atom_1 in residue_1:
                    interacting_residues.update(ns.search(atom_1.get_coord(), r_cutoff, 'R'))

                # Residue 2
                interacting_residue_ids = []
                for residue_2 in interacting_residues:
                    try:
                        residue_2_idx = chain_2_residue_ids.index(residue_2.id)
                    except ValueError:
                        continue
                    residue_2_resnum = str(residue_2.id[1]) + residue_2.id[2].strip()
                    residue_2_aa = convert_aa(residue_2.get_resname(), quiet=True)
                    residue_2_key = (
                        chain_2_idx, chain_2.id, residue_2_idx, residue_2_resnum, residue_2_aa
                    )
                    interacting_residue_ids.append(residue_2_key)
                if interacting_residue_ids:
                    interactions_between_chains\
                        .setdefault(residue_1_key, set())\
                        .update(interacting_residue_ids)

    return interactions_between_chains