def read_pdb_file(file_name, name=None): """ Extract info from a PDB file file_name: path of pdb file name: name of the structure (default name of the file without extension) return:: (structure,R,polypeptides,sequence,seq_res_dict) structure: structure object residues: list of residues polypeptides: list of polypeptides in the structure sequence: combined sequence (for all polypeptides) seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to residues[seq_res_dict[i]] """ if name is None: name = splitext(file_name)[0] structure = PDBParser().get_structure(name, file_name) if len(structure) != 1: raise ValueError("Unexpected number of structures in " + name) residues = Selection.unfold_entities(structure, 'R') atoms = Selection.unfold_entities(structure, 'A') # polypeptides = PPBuilder().build_peptides(structure) # if len(polypeptides) == 0: # polypeptides = CaPPBuilder().build_peptides(structure) # sequence = ''.join([p.get_sequence().tostring() for p in polypeptides]) # res_dict = dict(zip(residues, range(len(residues)))) # seq_res_dict = [res_dict[residues] for p in polypeptides for residues in p] return structure, atoms, residues
def get_end_chain(structure, normal, axis, res_in_slice): """ To get end-chain factor in this slice :param structure: a pdb structure object :return: end_chain factor """ end_chain_counter = 0 chain_lst = slt.unfold_entities(structure, 'C') # Get all chains in list aa_name_lst = [ 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL' ] for chain in chain_lst: res_lst = slt.unfold_entities(chain, 'R') res_lst_clean = [] for res in res_lst: hetero_atom = res.id[0][0] if res.id[0][0] == " ": res_lst_clean.append(res) if between_planes(normal, axis, res_lst_clean[0]['CA']): end_chain_counter = end_chain_counter + 1 if between_planes(normal, axis, res_lst_clean[-1]['CA']): end_chain_counter = end_chain_counter + 1 res_num = len(res_in_slice) end_chain = end_chain_counter / res_num return end_chain
def pdb_neighbors(pdb_f, pdb_id): structure = PDBParser().get_structure(pdb_id, pdb_f) atom_list = Selection.unfold_entities(structure, 'A') ns = NeighborSearch(atom_list) center = [(a, a.get_coord()) for a in structure.get_atoms() if a.get_parent().get_resname() in ['PTR', 'SEP', 'TPO'] and a.get_name() in ['O1P', 'O2P', 'O3P', 'OH', 'OG', 'OG1']] # if there are no phos-atomes, return if len(center) == 0: return '' neighbors = {} for a, c in center: # set neighbor distance cutoff neighbor_list = ns.search(c, BOND_CUTOFF) residue_list = list(set(Selection.unfold_entities(neighbor_list, 'R'))) try: neighbors[Selection.unfold_entities(a, 'R')[0]] = [ x for x in residue_list if not x == Selection.unfold_entities(a, 'R')[0] ] except: continue # add residue id and chain id neighbors_full = dict([('_'.join( [k.get_resname(), str(k.get_id()[1]), str(k.get_parent().get_id())]), [ '_'.join([ vi.get_resname(), str(vi.get_id()[1]), str(vi.get_parent().get_id()) ]) for vi in v ]) for k, v in neighbors.iteritems()]) return neighbors_full
def within(resnum, angstroms, chain_id, model, use_ca=False, custom_coord=None): """See: https://www.biostars.org/p/1816/ https://www.biostars.org/p/269579/ Args: resnum (int): angstroms (float): chain_id (str): model (Model): use_ca (bool): If the alpha-carbon atom should be used for the search, otherwise use the last atom of the residue custom_coord (list): Custom XYZ coordinate to get within Returns: list: List of Bio.PDB.Residue.Residue objects """ # XTODO: documentation # TODO: should have separate method for within a normal residue (can use "resnum" with a int) or a custom coord, # where you don't need to specify resnum atom_list = Selection.unfold_entities(model, 'A') ns = NeighborSearch(atom_list) if custom_coord: # a list of XYZ coord target_atom_coord = np.array(custom_coord, 'f') else: target_residue = model[chain_id][resnum] if use_ca: target_atom = target_residue['CA'] else: target_atom = target_residue.child_list[-1] target_atom_coord = np.array(target_atom.get_coord(), 'f') neighbors = ns.search(target_atom_coord, angstroms) residue_list = Selection.unfold_entities(neighbors, 'R') return residue_list
def __init__(self, fasta_align, m1, m2, si=0, sj=1): """Initialise. Attributes: - fasta_align - Alignment object - m1, m2 - two models - si, sj - the sequences in the Alignment object that correspond to the structures """ l = fasta_align.get_alignment_length() # Get the residues in the models rl1 = Selection.unfold_entities(m1, 'R') rl2 = Selection.unfold_entities(m2, 'R') # Residue positions p1 = 0 p2 = 0 # Map equivalent residues to each other map12 = {} map21 = {} # List of residue pairs (None if -) duos = [] for i in range(0, l): column = fasta_align[:, i] aa1 = column[si] aa2 = column[sj] if aa1 != "-": # Position in seq1 is not - while True: # Loop until an aa is found r1 = rl1[p1] p1 = p1 + 1 if is_aa(r1): break self._test_equivalence(r1, aa1) else: r1 = None if aa2 != "-": # Position in seq2 is not - while True: # Loop until an aa is found r2 = rl2[p2] p2 = p2 + 1 if is_aa(r2): break self._test_equivalence(r2, aa2) else: r2 = None if r1: # Map residue in seq1 to its equivalent in seq2 map12[r1] = r2 if r2: # Map residue in seq2 to its equivalent in seq1 map21[r2] = r1 # Append aligned pair (r is None if gap) duos.append((r1, r2)) self.map12 = map12 self.map21 = map21 self.duos = duos
def _get_contacts(pdb_path, chain_rec, chain_lig, contact_dist): structure = pdb_parser.get_structure('X', pdb_path)[0] receptor = [structure[chain_rec_id] for chain_rec_id in chain_rec] ligand = [structure[chain_lig_id] for chain_lig_id in chain_lig] receptor_atoms = Selection.unfold_entities(receptor, 'A') ns = NeighborSearch(receptor_atoms) ligand_residues = Selection.unfold_entities(ligand, 'R') contacts = set([]) contacts_lig = set([]) contacts_rec = set([]) for ligand_res in ligand_residues: lig_resname = dindex_to_1[d3_to_index[ligand_res.get_resname()]] lig_resnum = ligand_res.get_id()[1] lig_chname = ligand_res.get_parent().get_id() res_contacts = [] for lig_atom in ligand_res: neighbors = ns.search(lig_atom.get_coord(), contact_dist) res_contacts += Selection.unfold_entities(neighbors, 'R') for receptor_res in res_contacts: rec_resname = dindex_to_1[d3_to_index[receptor_res.get_resname()]] rec_resnum = receptor_res.get_id()[1] rec_chname = receptor_res.get_parent().get_id() contacts.add((rec_resname, rec_resnum, rec_chname, lig_resname, lig_resnum, lig_chname)) contacts_lig.add((lig_resname, lig_resnum, lig_chname)) contacts_rec.add((rec_resname, rec_resnum, rec_chname)) return contacts, contacts_rec, contacts_lig
def pdb_neighbors(pdb_f, pdb_id): structure = PDBParser().get_structure(pdb_id, pdb_f) atom_list = Selection.unfold_entities(structure, 'A') ns = NeighborSearch(atom_list) center_res = [res for res in structure.get_residues() if res.get_resname() in ['PTR','SEP','TPO']] neighbors = [] for res in center_res: if res.get_resname() == 'PTR': atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OH']] elif res.get_resname() == 'SEP': atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OG']] elif res.get_resname() == 'TPO': atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OG1']] atom_neighbors = [ns.search(a.get_coord(),BOND_CUTOFF) for a in atoms] atom_neighbors = [atom for atoms in atom_neighbors for atom in atoms] atom_neighbors = list(set(atom_neighbors)) atom_neighbors = [atom for atom in atom_neighbors if 'N' in atom.get_name() or 'O' in atom.get_name()] atom_neighbors = list(set(Selection.unfold_entities(atom_neighbors,'R'))) atom_neighbors = [r for r in atom_neighbors if r != res] if len(atom_neighbors) > 0: res = res.get_resname()+'_'+str(res.get_id()[1])+'_'+res.get_parent().get_id() atom_neighbors = [n.get_resname()+'_'+str(n.get_id()[1])+'_'+n.get_parent().get_id() for n in atom_neighbors] neighbors.append((pdb_id,res,atom_neighbors)) return neighbors
def __init__(self, fasta_align, m1, m2, si=0, sj=1): """Initialize. Attributes: - fasta_align - Alignment object - m1, m2 - two models - si, sj - the sequences in the Alignment object that correspond to the structures """ length = fasta_align.get_alignment_length() # Get the residues in the models rl1 = Selection.unfold_entities(m1, 'R') rl2 = Selection.unfold_entities(m2, 'R') # Residue positions p1 = 0 p2 = 0 # Map equivalent residues to each other map12 = {} map21 = {} # List of residue pairs (None if -) duos = [] for i in range(length): column = fasta_align[:, i] aa1 = column[si] aa2 = column[sj] if aa1 != "-": # Position in seq1 is not - while True: # Loop until an aa is found r1 = rl1[p1] p1 = p1 + 1 if is_aa(r1): break self._test_equivalence(r1, aa1) else: r1 = None if aa2 != "-": # Position in seq2 is not - while True: # Loop until an aa is found r2 = rl2[p2] p2 = p2 + 1 if is_aa(r2): break self._test_equivalence(r2, aa2) else: r2 = None if r1: # Map residue in seq1 to its equivalent in seq2 map12[r1] = r2 if r2: # Map residue in seq2 to its equivalent in seq1 map21[r2] = r1 # Append aligned pair (r is None if gap) duos.append((r1, r2)) self.map12 = map12 self.map21 = map21 self.duos = duos
def secondary_struc_cmap(chain, sequence, structure, cutoff_distance=4.5, cutoff_numcontacts=10, exclude_neighbour=3, ss_elements=['H', 'E', 'B', 'b', 'G']): atom_list = Selection.unfold_entities(chain, 'A') res_list = Selection.unfold_entities(chain, 'R') res_names, numbering = [], [] for res in res_list: res_names.append(res.get_resname()) numbering.append(res.get_id()[1]) numbering = np.array(numbering) res_range = np.array(range(len(numbering))) assert len(structure) == len( numbering ), f'PDB file and Secondary structure map do not match!\n {chain.get_parent().get_parent().id} - PDB: {len(res_list)} Residues VS. STRIDE: {len(sequence)} Residues. ' ns = NeighborSearch(atom_list) all_neighbours = ns.search_all(cutoff_distance, 'A') struc_length = len(structure) segment = np.zeros([struc_length], dtype='int') nseg = 1 for i in range(struc_length): if structure[i] in ss_elements: segment[i] = nseg if i == struc_length: nseg += 1 elif structure[i + 1] != structure[i]: nseg += 1 nseg -= 1 index_list = [] for atompair in all_neighbours: res1 = res_range[numbering == atompair[0].get_parent().id[1]][0] res2 = res_range[numbering == atompair[1].get_parent().id[1]][0] if abs(res1 - res2) > exclude_neighbour: if segment[res1] != 0 and segment[res2] != 0 and segment[ res1] != segment[res2]: index_list.append((segment[res1] - 1, segment[res2] - 1)) index_list.sort() count = Counter(index_list) index = [values for values in count if count[values] >= cutoff_numcontacts] return np.array(index), segment
def pp_getcgeom(pp, sidechains=0): if sidechains: sum_coords = sum( [atom.get_coord() for atom in Selection.unfold_entities(pp, "A")]) centre_g = sum_coords / len(Selection.unfold_entities(pp, "A")) else: backbone = ["CA", "CB", "N", "O"] coords = [ atom.get_coord() for atom in Selection.unfold_entities(pp, "A") if atom.get_id() in backbone ] centre_g = sum(coords) / len(coords) return centre_g
def get_res_dist(struct1, struct2, alignment): struct1_residues = Selection.unfold_entities(struct1, 'R') struct2_residues = Selection.unfold_entities(struct2, 'R') av_dist = 0.0 for res1, res2 in alignment: if ('CA' in struct1_residues[res1]) and ('CA' in struct2_residues[res2]): ca1 = struct1_residues[res1]['CA'] ca2 = struct2_residues[res2]['CA'] av_dist += ca1 - ca2 else: continue return av_dist / float(len(alignment))
def interaction(self, pdb_id, filename, domain_1, domain_2): """Returns a dict with informations (atoms, residues...) if two domains interact with each other, and returns False if not.""" print "Searching for interactions in "+pdb_id+"..." # creates a strucuture object/class to extract atoms of the two domains model = structure(pdb_id).get_model(pdb_id, filename) residues_1 = structure(pdb_id).get_residues(model, domain_1) residues_2 = structure(pdb_id).get_residues(model, domain_2) atoms_1 = Selection.unfold_entities(residues_1, 'A') atoms_2 = Selection.unfold_entities(residues_2, 'A') # gets the serial numbers of the atoms numbers_1 = structure(pdb_id).serial_numbers(atoms_1) numbers_2 = structure(pdb_id).serial_numbers(atoms_2) # the search starts here ! atoms = Selection.unfold_entities(model, 'A') nsearch = NeighborSearch(atoms) interacting_atoms_1 = [] interacting_atoms_2 = [] for atom in atoms: if atom.get_serial_number() in numbers_1: point = atom.get_coord() # This is how we detect an interaction, we put 5 angstroms # here. # This is the simplest method we can use, and we're not sure # that it is correct. # Originally we have planned to go further by doing a surface # and accesssion analysis, but we had no time. # We hope we can talk about that during the talk. neighbors = nsearch.search(point, 5) for neighbor in neighbors: if neighbor.get_serial_number() in numbers_2: interacting_atoms_2.append(neighbor) if atom not in interacting_atoms_1: interacting_atoms_1.append(atom) # returns a dict with all residues and atoms if len(interacting_atoms_2) > 0: infos = {} infos['1'] = {} infos['2'] = {} # just get the parent residues for the list of atoms interacting_residues_1 = structure(pdb_id).atoms2residues( interacting_atoms_1) interacting_residues_2 = structure(pdb_id).atoms2residues( interacting_atoms_2) infos['1']['atoms'] = interacting_atoms_1 infos['2']['atoms'] = interacting_atoms_2 infos['1']['residues'] = interacting_residues_1 infos['2']['residues'] = interacting_residues_2 return infos else: return False
def __init__(self, align, m1, m2): """Produces a structural alignment of two models Input: - fasta_align - Alignment object - m1, m2 - two models - si, sj - the sequences in the Alignment object that correspond to the structures """ length = align[4]-align[3] # Get the residues in the models rl1 = Selection.unfold_entities(m1, 'R') rl2 = Selection.unfold_entities(m2, 'R') # Residue positions p1 = 0 p2 = 0 # Map equivalent residues to each other map12 = {} map21 = {} residue_pairs = [] for i in range(length): aa1 = align[0][i] aa2 = align[1][i] if aa1 != "-": while True: r1 = rl1[p1] p1 = p1 + 1 if is_aa(r1): break self._test_equivalence(r1, aa1) else: r1 = None if aa2 != "-": while True: r2 = rl2[p2] p2 = p2 +1 if is_aa(r2): break self._test_equivalence(r2, aa2) else: r2 = None if r1: map12[r1] = r2 if r2: map21[r2] = r1 residue_pairs.append((r1,r2)) self.map12 = map12 self.map21 = map21 self.residue_pairs= residue_pairs
def read_pdb_file(file_name, name=None): """ Extract info from a PDB file file_name: path of pdb file name: name of the structure (default name of the file without extension) return:: (structure,R,polypeptides,sequence,seq_res_dict) structure: structure object residues: list of residues polypeptides: list of polypeptides in the structure sequence: combined sequence (for all polypeptides) seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to residues[seq_res_dict[i]] """ if name is None: name = splitext(file_name)[0] structure = PDBParser().get_structure(name, file_name) if len(structure) != 1: raise ValueError("Unexpected number of structures in " + name) # residues = Selection.unfold_entities(structure, 'R') atoms = Selection.unfold_entities(structure, 'A') polypeptides = PPBuilder().build_peptides(structure) if len(polypeptides) == 0: polypeptides = CaPPBuilder().build_peptides(structure) sequence = ''.join([str(p.get_sequence()) for p in polypeptides]) residues = [residue for polypeptide in polypeptides for residue in polypeptide] protein_name = os.path.basename(file_name).replace(".pdb", "") return protein_name, structure, residues, sequence, atoms
def pdb_dist(pdb_f, pdb_id): structure = PDBParser().get_structure(pdb_id, pdb_f) atom_list = Selection.unfold_entities(structure, 'A') ns = NeighborSearch(atom_list) center = [a for a in structure.get_atoms() if a.get_parent().get_resname( ) in ['PTR', 'SEP', 'TPO'] and a.get_name() in ['O1P', 'O2P', 'O3P', 'OH', 'OG', 'OG1']] # neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname( # ) in ['ARG'] and a.get_name() in ['NE', 'NH2', 'NH1']] # neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname( # ) in ['Lys'] and a.get_name() in ['NZ']] neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname( ) in ['HIS'] and a.get_name() in ['ND1','NE2']] def calc_dist(a, b): vector = a.coord - b.coord return np.sqrt(np.sum(vector * vector)) dist = {} for c in center: for n in neighbors: value = calc_dist(c, n) key = str(c.get_parent()) + '_' + str(n.get_parent()) if not key in dist.keys(): dist[key] = [value] else: dist[key].append(value) for k, v in dist.items(): dist[k] = min(v) dist = [v for k, v in dist.items()] return dist
def read_pdb_file(file_name, name=None): """ Extract info from a PDB file file_name: path of pdb file name: name of the structure (default name of the file without extension) return:: (structure,R,polypeptides,sequence,seq_res_dict) structure: structure object residues: list of residues polypeptides: list of polypeptides in the structure sequence: combined sequence (for all polypeptides) seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to residues[seq_res_dict[i]] """ if name is None: name = splitext(file_name)[0] structure = PDBParser().get_structure(name, file_name) if len(structure) != 1: raise ValueError("Unexpected number of structures in " + name) # residues = Selection.unfold_entities(structure, 'R') atoms = Selection.unfold_entities(structure, 'A') polypeptides = PPBuilder().build_peptides(structure) if len(polypeptides) == 0: polypeptides = CaPPBuilder().build_peptides(structure) sequence = ''.join([str(p.get_sequence()) for p in polypeptides]) residues = [ residue for polypeptide in polypeptides for residue in polypeptide ] protein_name = os.path.basename(file_name).replace(".pdb", "") return protein_name, structure, residues, sequence, atoms
def __init__(self, model, pdb_file=None): # Issue warning if pdb_file is given if pdb_file is not None: warnings.warn(("ResidueDepth no longer requires a pdb file." " This argument will be removed in a future release" " of Biopython."), BiopythonDeprecationWarning) depth_dict = {} depth_list = [] depth_keys = [] # get_residue residue_list = Selection.unfold_entities(model, 'R') # make surface from PDB file using MSMS surface = get_surface(model) # calculate rdepth for each residue for residue in residue_list: if not is_aa(residue): continue rd = residue_depth(residue, surface) ca_rd = ca_depth(residue, surface) # Get the key res_id = residue.get_id() chain_id = residue.get_parent().get_id() depth_dict[(chain_id, res_id)] = (rd, ca_rd) depth_list.append((residue, (rd, ca_rd))) depth_keys.append((chain_id, res_id)) # Update xtra information residue.xtra['EXP_RD'] = rd residue.xtra['EXP_RD_CA'] = ca_rd AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
def GetResidueDepPDB(pdb, pdbfile): s = GetStructure(pdb) model = s[0] residuelist = Selection.unfold_entities(model, 'R') try: surface = get_surface(pdbfile, PDBTOXYZ, MSMS) except: print "cannot get surface for " + pdbfile return content = "" for residue in residuelist: if not is_aa(residue): continue # minimun average depth for all atoms resid = residue.get_id() resname = residue.get_resname() chainid = residue.get_parent().get_id() try: rd = residue_depth(residue, surface) except: continue ca_rd = ca_depth(residue, surface) info = [pdb, chainid, resid[1], resname, str(rd), str(ca_rd)] for each in info: if not each: continue #print info newline = "\t".join(map(str, info)) + "\n" content = content + newline mutex_writefile.acquire() outobj = open(OUT, "a") outobj.write(content) outobj.close() mutex_writefile.release()
def get_surface(model, MSMS="msms"): """Represent molecular surface as a vertex list array. Return a Numpy array that represents the vertex list of the molecular surface. Arguments: - MSMS - msms executable (used as argument to subprocess.call) """ # Replace pdb_to_xyzr # Make x,y,z,radius file atom_list = Selection.unfold_entities(model, "A") xyz_tmp = tempfile.mktemp() with open(xyz_tmp, "w") as pdb_to_xyzr: for atom in atom_list: x, y, z = atom.coord radius = _get_atom_radius(atom, rtype="united") pdb_to_xyzr.write(f"{x:6.3f}\t{y:6.3f}\t{z:6.3f}\t{radius:1.2f}\n") # make surface surface_tmp = tempfile.mktemp() MSMS = MSMS + " -probe_radius 1.5 -if %s -of %s > " + tempfile.mktemp() make_surface = MSMS % (xyz_tmp, surface_tmp) subprocess.call(make_surface, shell=True) surface_file = surface_tmp + ".vert" if not os.path.isfile(surface_file): raise RuntimeError( f"Failed to generate surface file using command:\n{make_surface}") # read surface vertices from vertex file surface = _read_vertex_array(surface_file) return surface
def interchain_residue_contacts(self, chain_ids_1, chain_ids_2, radius): """ Generate a list of residue contacts between two chains. """ all_chains = {chain.get_id(): chain for chain in self.get_chains()} selected_chains = { chain_id: chain for chain_id, chain in all_chains.items() if chain_id in chain_ids_1 + chain_ids_2 } atoms = [ atom for chain_id, chain in selected_chains.items() for atom in Selection.unfold_entities(chain, "A") ] residue_contacts = NeighborSearch(atoms).search_all(radius, "R") classified_contacts = defaultdict(list) for contact in residue_contacts: chain_1, chain_2 = [ residue.get_parent().get_id() for residue in contact ] if chain_1 in chain_ids_1 and chain_2 in chain_ids_2: classified_contacts[(chain_1, chain_2)].append({ chain_1: contact[0], chain_2: contact[1] }) elif chain_2 in chain_ids_1 and chain_1 in chain_ids_2: classified_contacts[(chain_2, chain_1)].append({ chain_1: contact[0], chain_2: contact[1] }) return classified_contacts
def volume_delaunay(model): """Returns dictionary containing volume for each residue in `model`. Parameters ---------- model: Bio.PDB.Model.Model Model of the protein structure. """ volume_dict = {} atoms = np.array([atom for atom in model.get_atoms()]) delaunay = Delaunay([atom.coord for atom in atoms]) for simplex in delaunay.simplices: parent_residues = Selection.get_unique_parents(atoms[simplex]) # Simplex is taken into account only if is totally contained in one # residue. if len(parent_residues) is 1: unique_parent = label_residue(parent_residues[0]) cv_simplex = ConvexHull([atom.coord for atom in atoms[simplex]]) volume_dict.setdefault(unique_parent, 0) volume_dict[unique_parent] += cv_simplex.volume return volume_dict
def calculateNeighbors(filename, radius): data = {} structure = parser.get_structure(filename.split(".pdb")[0], filename) atom_list = Selection.unfold_entities(structure, 'A') # A for atoms residue_list = Selection.unfold_entities(structure, 'R') # R for residues neighbor_search = NeighborSearch(atom_list) for residue in residue_list: resid = str(residue.get_id()[1]) contacts = [] for atom in residue.get_list(): contacts.extend(neighbor_search.search(atom.get_coord(), radius, level = "A")) burial = len(contacts)/len(residue.get_list()) data[resid] = burial return data
def chain2pos_scan_str(chain, pdb, mutation_set='a'): """ Takes a chain ID and a model.PDBFile object, returns a string suitable as the PositionScan line for FoldX. """ parser = PDBParser(PERMISSIVE=1) pdbfn = pdb.fullpath() struct = parser.get_structure(pdb.uuid, pdbfn)[0] #chains = pdb_extract_chain_seqs(struct) chainlist = Selection.unfold_entities(struct, 'C') position_scan_str = '' for c in chainlist: if c.id == chain: for r in c: try: aa = three_to_one(r.get_resname()) resnum = r.id[1] position_scan_str += '%s%s%i%s,' % (aa, chain, resnum, mutation_set) except: # non-native amino acid or water pass position_scan_str = position_scan_str[:-1] return position_scan_str
def compute_interactions(pdb_name): # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure s = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name) # Get the preferred chain preferred_chain = s.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = pdb_get_structure(pdb_name) # Get all atoms atom_list = Selection.unfold_entities(s.get_chains().__next__(), 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(4.5, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])] # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES] # For each pair of interacting residues, determine the type of interaction interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1])) for res_pair in all_aa_neighbors] # Split unto classified and unclassified. classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0] return classified
def collectHbondsNumber(self, foldxIndivFile): wt_mut_chain_id = whatMutation(foldxIndivFile) prot_chain = wt_mut_chain_id[3] prot_mut_id = int(wt_mut_chain_id[2]) model = self.structure[0] myres = model[prot_chain][prot_mut_id] atoms = Selection.unfold_entities(model, 'A') # A for atoms ns = NeighborSearch(atoms) resname = myres.get_resname() h_bond_as_donor = [] h_bond_as_acceptor = [] for atom in myres: if atom.name in donors: #atoms = Selection.unfold_entities(model, 'A') #print(atoms) # cutoff of 3.2 angstroms D-A, strong mostly covalent according to # Jeffrey, George A.; An introduction to hydrogen bonding, Oxford University Press, 1997. close_atoms = ns.search(atom.coord, 3.2) for close_atom in close_atoms: full_atom_id = close_atom.get_full_id() if (full_atom_id[3][1] != prot_mut_id) and ( full_atom_id[4][0] in acceptors + main_chain_acceptors): acceptor_atom_id = full_atom_id[3][1] h_bond_as_donor.append(acceptor_atom_id) # do the same thing fro acceptor atoms if atom.name in acceptors: close_atoms = ns.search(atom.coord, 3.2) for close_atom in close_atoms: full_atom_id = close_atom.get_full_id() if (full_atom_id[3][1] != prot_mut_id) and ( full_atom_id[4][0] in donors + main_chain_donors): acceptor_atom_id = full_atom_id[3][1] h_bond_as_acceptor.append(acceptor_atom_id) return len(h_bond_as_donor) + len(h_bond_as_acceptor)
def __init__(self, model, msms_exec=None): """Initialize the class.""" if msms_exec is None: msms_exec = "msms" depth_dict = {} depth_list = [] depth_keys = [] # get_residue residue_list = Selection.unfold_entities(model, "R") # make surface from PDB file using MSMS surface = get_surface(model, MSMS=msms_exec) # calculate rdepth for each residue for residue in residue_list: if not is_aa(residue): continue rd = residue_depth(residue, surface) ca_rd = ca_depth(residue, surface) # Get the key res_id = residue.get_id() chain_id = residue.get_parent().get_id() depth_dict[(chain_id, res_id)] = (rd, ca_rd) depth_list.append((residue, (rd, ca_rd))) depth_keys.append((chain_id, res_id)) # Update xtra information residue.xtra["EXP_RD"] = rd residue.xtra["EXP_RD_CA"] = ca_rd AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
def __init__(self, model, pdb_file=None): """Initialize the class.""" # Issue warning if pdb_file is given if pdb_file is not None: warnings.warn( "ResidueDepth no longer requires a pdb file. " "This argument will be removed in a future release " "of Biopython.", BiopythonDeprecationWarning) depth_dict = {} depth_list = [] depth_keys = [] # get_residue residue_list = Selection.unfold_entities(model, 'R') # make surface from PDB file using MSMS surface = get_surface(model) # calculate rdepth for each residue for residue in residue_list: if not is_aa(residue): continue rd = residue_depth(residue, surface) ca_rd = ca_depth(residue, surface) # Get the key res_id = residue.get_id() chain_id = residue.get_parent().get_id() depth_dict[(chain_id, res_id)] = (rd, ca_rd) depth_list.append((residue, (rd, ca_rd))) depth_keys.append((chain_id, res_id)) # Update xtra information residue.xtra['EXP_RD'] = rd residue.xtra['EXP_RD_CA'] = ca_rd AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
def collectSaltBridge(self, position, chain): model = self.structure[0] myres = model[chain][position] atoms = Selection.unfold_entities(model, 'A') # A for atoms ns = NeighborSearch(atoms) if myres.get_resname() not in ['ARG', 'LYS', 'ASP', 'GLU', 'HIS']: is_sb = 0 else: for atom in myres: atom_id = atom.get_full_id() if atom_id[4][0] in ['NH1', 'NH2', 'NZ', 'NE2']: close_atoms = ns.search( atom.coord, 4.5 ) # cutoff of 4 crieria fixed by Barlow, J M Thornton (PMID6887253) +0.5A to account for the unoptimised side chain if any(atom in [atomtype.id for atomtype in close_atoms] for atom in ['OE1', 'OE2', 'OD1', 'OD2']): is_sb = 1 break else: is_sb = 0 break elif atom_id[4][0] in ['OE1', 'OE2', 'OD1', 'OD2']: close_atoms = ns.search(atom.coord, 4.5) if any(atom in [atomtype.id for atomtype in close_atoms] for atom in ['NH1', 'NH2', 'NZ', 'NE2']): is_sb = 1 break else: is_sb = 0 break return is_sb
def structure_filtered_dca_get_pairwise_distances_and_sequence( pdb_id, chain_id, start, end, pdb_directory='./pdbs/'): parser = PDBParser() structure = parser.get_structure( pdb_id, os.path.join( pdb_directory, "%s%s_%s-%s.pdb" % (pdb_id, chain_id, str(start), str(end)))) structure = structure[0][chain_id] sequence = structure_filtered_dca_get_sequence_from_structure(structure) res_list = Selection.unfold_entities(structure, 'R') res_list = [ residue for residue in res_list if not structure_filtered_dca_is_hetero(residue) ] pairwise_distances = [] for i, res1 in enumerate(res_list): pairwise_distances.append([]) for j, res2 in enumerate(res_list): pairwise_distances[-1].append( structure_filtered_dca_get_interaction_distance(res1, res2)) return np.array(pairwise_distances), sequence
def get_PDB_indices(chain_obj): list_indices = [] for residue in Selection.unfold_entities(chain_obj, 'R'): if is_residue(residue): list_indices.append( (residue.get_full_id()[1], residue.get_full_id()[2], residue.get_id()[1])) list_indices = np.array(list_indices) return list_indices
def extract_beads(pdb_path): amino_acids = pd.read_csv('/home/hyang/bio/erf/data/amino_acids.csv') vocab_aa = [x.upper() for x in amino_acids.AA3C] vocab_dict = { x.upper(): y for x, y in zip(amino_acids.AA3C, amino_acids.AA) } p = PDBParser() try: structure = p.get_structure('X', pdb_path) except: return 0 residue_list = Selection.unfold_entities(structure, 'R') ca_center_list = [] cb_center_list = [] res_name_list = [] res_num_list = [] chain_list = [] for res in residue_list: if res.get_resname() not in vocab_aa: # raise ValueError('protein has non natural amino acids') continue chain_list.append(res.parent.id) res_name_list.append(vocab_dict[res.get_resname()]) res_num_list.append(res.id[1]) try: ca_center_list.append(res['CA'].get_coord()) except KeyError: return 0 if res.get_resname() != 'GLY': try: cb_center_list.append(res['CB'].get_coord()) except KeyError: return 0 else: cb_center_list.append(res['CA'].get_coord()) ca_center = np.vstack(ca_center_list) cb_center = np.vstack(cb_center_list) df = pd.DataFrame({ 'chain_id': chain_list, 'group_num': res_num_list, 'group_name': res_name_list, 'x': ca_center[:, 0], 'y': ca_center[:, 1], 'z': ca_center[:, 2], 'xcb': cb_center[:, 0], 'ycb': cb_center[:, 1], 'zcb': cb_center[:, 2] }) df.to_csv(f'{pdb_path}_bead.csv', index=False) return 1
def __init__(self, pdb, j_input): parser = PDBParser(QUIET=True) self.wkdir = Path.cwd() self.pdb = pdb.replace(".pdb", "") self.struct = parser.get_structure(pdb, pdb) self.atom_list = Selection.unfold_entities(self.struct, 'A') self.j_input = j_input self.parse_j_list()
def test_on_pdb(self): """Align a PDB to itself.""" pdb1 = "PDB/1A8O.pdb" p = PDBParser() s1 = p.get_structure("FIXED", pdb1) fixed = Selection.unfold_entities(s1, "A") s2 = p.get_structure("MOVING", pdb1) moving = Selection.unfold_entities(s2, "A") rot = np.eye(3, dtype=np.float64) tran = np.array([1.0, 2.0, 3.0], dtype=np.float64) for atom in moving: atom.transform(rot, tran) sup = QCPSuperimposer() sup.set_atoms(fixed, moving) self.assertEqual(self._arr_to_list(sup.rotran[0]), self._arr_to_list(rot)) self.assertEqual(self._arr_to_list(sup.rotran[1]), self._arr_to_list(-tran)) self.assertAlmostEqual(sup.rms, 0.0, places=6)
def test_fragment_mapper(self): """Self test for FragmentMapper module.""" p = PDBParser() pdb1 = "PDB/1A8O.pdb" s = p.get_structure("X", pdb1) m = s[0] fm = FragmentMapper(m, 10, 5, "PDB") for r in Selection.unfold_entities(m, "R"): if r in fm: self.assertTrue(str(fm[r]).startswith("<Fragment length=5 id="))
def atom_distance(x, y): '''Returns the closest distance between two objects (x and y), i.e. the distance between the closest atoms in x and y.''' [x_atoms, y_atoms] = [Selection.unfold_entities(obj, 'A') for obj in [x, y]] distances = [] for xatom in x_atoms: for yatom in y_atoms: distances.append(distance(xatom.get_coord(), yatom.get_coord())) return min(distances)
def get_bfactors(self, chain_id): ''' Input: self: Use Biopython.PDB structure which has been stored in an object variable chain_id : String (usually in ['A','B', 'C' ...]. The number of chains depends on the specific protein and the resulting structure) Return: Return the B-Factors for all residues in a chain of a Biopython.PDB structure. The B-Factors describe the mobility of an atom or a residue. In a Biopython.PDB structure B-Factors are given for each atom in a residue. Calculate the mean B-Factor for a residue by averaging over the B-Factor of all atoms in a residue. Sometimes B-Factors are not available for a certain residue; (e.g. the residue was not resolved); insert np.nan for those cases. Finally normalize your B-Factors using Standard scores (zero mean, unit variance). You have to use np.nanmean, np.nanvar etc. if you have nan values in your array. The returned data structure has to be a numpy array rounded again to integer. ''' aacids = [ a for a in Selection.unfold_entities( self.structure.child_list[0].child_dict[chain_id], 'R') if is_aa(a, standard=True) ] length = len(aacids) b_factors = np.zeros(length, dtype=np.float32) #b_factors = np.array(length, dtype=np.float32) for i in range(len(aacids)): atoms = Selection.unfold_entities(aacids[i], 'A') if len(atoms) == 0: b_factor = np.nan else: b_factor = np.average([a.get_bfactor() for a in atoms]) b_factors[i] = b_factor mean = np.nanmean(b_factors) var = np.var(b_factors) sd = np.sqrt(var) for i in range(len(b_factors)): if b_factors[i] != np.nan: b_factors[i] = (b_factors[i] - mean) / sd return b_factors.astype(np.int) # return rounded (integer) values
def __call__(self,structure): res_list = Selection.unfold_entities(structure,'R') res_list = filter(lambda res: res.id[0] == " ",res_list) distance_matrix = np.zeros([len(res_list),len(res_list)]) for i,res1 in enumerate(res_list): for j,res2 in enumerate(res_list): try: distance_matrix[i,j] = res1['CA'] - res2['CA'] except KeyError: distance_matrix[i,j] = 0 return distance_matrix
def get_surface(model, PDB_TO_XYZR=None, MSMS="msms"): """Represent molecular surface as a vertex list array. Return a Numpy array that represents the vertex list of the molecular surface. Arguments: - PDB_TO_XYZR - deprecated, ignore this. - MSMS - msms executable (used as argument to os.system) """ # Issue warning if PDB_TO_XYZR is given if PDB_TO_XYZR is not None: warnings.warn(("PDB_TO_XYZR argument will be deprecated soon" " in favor of an internal mapping algorithm."), BiopythonDeprecationWarning) # Replace pdb_to_xyzr # Make x,y,z,radius file atom_list = Selection.unfold_entities(model, 'A') xyz_tmp = tempfile.mktemp() with open(xyz_tmp, 'w') as pdb_to_xyzr: for atom in atom_list: x, y, z = atom.coord radius = _get_atom_radius(atom, rtype='united') print('{:6.3f}\t{:6.3f}\t{:6.3f}\t{:1.2f}'.format(x, y, z, radius), file=pdb_to_xyzr) # make surface surface_tmp = tempfile.mktemp() MSMS = MSMS + " -probe_radius 1.5 -if %s -of %s > " + tempfile.mktemp() make_surface = MSMS % (xyz_tmp, surface_tmp) os.system(make_surface) surface_file = surface_tmp + ".vert" if not os.path.isfile(surface_file): raise RuntimeError("Failed to generate surface file using " "command:\n%s" % make_surface) # read surface vertices from vertex file surface = _read_vertex_array(surface_file) return surface
def __init__(self, model, pdb_file): depth_dict = {} depth_list = [] depth_keys = [] # get_residue residue_list = Selection.unfold_entities(model, "R") # make surface from PDB file surface = get_surface(pdb_file) # calculate rdepth for each residue for residue in residue_list: if not is_aa(residue): continue rd = residue_depth(residue, surface) ca_rd = ca_depth(residue, surface) # Get the key res_id = residue.get_id() chain_id = residue.get_parent().get_id() depth_dict[(chain_id, res_id)] = (rd, ca_rd) depth_list.append((residue, (rd, ca_rd))) depth_keys.append((chain_id, res_id)) # Update xtra information residue.xtra["EXP_RD"] = rd residue.xtra["EXP_RD_CA"] = ca_rd AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
def _build_interface(self, model, id, threshold, rsa_calculation, rsa_threshold, include_waters=False, *chains): """ Return the interface of a model """ self.threshold=threshold # Recover chain list from initial unpacking chain_list = self.chain_list # Unfold atom list atom_list = [] for c in model: if c.id in chain_list: atom_list.extend(Selection.unfold_entities(c,'A')) # Using of NeighborSearch class in order to get the list of all residues at least than # the threshold distance of each others ns=NeighborSearch(atom_list) pairs=ns.search_all(threshold, 'R') if not pairs: raise ValueError("No atoms found in the interface") # Selection of residues pairs # 1. Exclude water contacts # 2. Filter same-chain contacts # 3. Filter user-defined chain pairs uniq_pairs=[] for pair in pairs: pair_resnames = (pair[0].resname, pair[1].resname) pair_chains = (pair[0].parent.id, pair[1].parent.id) if (not include_waters and 'HOH' in pair_resnames) or (pair_chains[0] == pair_chains[1]): continue if not (chains and not (pair_chains in chains)): uniq_pairs.append(pair) # Build the Interface # 1. Iterate over the pair list # 2. Add residues. for resA, resB in uniq_pairs: if resA not in self.interface: self._add_residue(resA) if resB not in self.interface: self._add_residue(resB) # Accessible surface area calculated for each residue # if naccess setup on user computer and rsa_calculation # argument is TRUE if rsa_calculation and os.system('which naccess') == 0: rsa_pairs=self._rsa_calculation(model, chain_list, rsa_threshold) for res in rsa_pairs: if res not in self.interface: self._add_residue(res) self._secondary_structure(model) #interface=uniq_pairs self.interface.uniq_pairs=uniq_pairs
def _process_structure(self): return [Chain(chain, self.charges) for chain in Selection.unfold_entities(self.structure, 'C')]
def compute_interactions(pdb_name,save_to_db = False): do_distances = True do_interactions = True distances = [] classified = [] # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure struc = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name) pdb_io = StringIO(struc.pdb_data.pdb) # Get the preferred chain preferred_chain = struc.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0] #s = pdb_get_structure(pdb_name)[0] chain = s[preferred_chain] #return classified, distances # remove residues without GN and only those matching receptor. residues = struc.protein_conformation.residue_set.exclude(generic_number=None).all().prefetch_related('generic_number') dbres = {} dblabel = {} for r in residues: dbres[r.sequence_number] = r dblabel[r.sequence_number] = r.generic_number.label ids_to_remove = [] for res in chain: if not res.id[1] in dbres.keys() and res.get_resname() != "HOH": ids_to_remove.append(res.id) for i in ids_to_remove: chain.detach_child(i) if do_distances: for i1,res1 in enumerate(chain,1): if not is_water(res1): for i2,res2 in enumerate(chain,1): if i2>i1 and not is_water(res2): # Do not calculate twice. distance = res1['CA']-res2['CA'] distances.append((dbres[res1.id[1]],dbres[res2.id[1]],distance,dblabel[res1.id[1]],dblabel[res2.id[1]])) if do_interactions: atom_list = Selection.unfold_entities(s[preferred_chain], 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(4.5, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])] # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES] # For each pair of interacting residues, determine the type of interaction interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1]),dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]],struc) for res_pair in all_aa_neighbors] # Split unto classified and unclassified. classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0] if save_to_db: if do_interactions: # Delete previous for faster load in InteractingResiduePair.objects.filter(referenced_structure=struc).all().delete() # bulk_pair = [] # for d in distances: # pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc) # bulk_pair.append(pair) # Create interaction dictionary interaction_pairs = {} for pair in classified: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) interaction_pairs[key] = pair # POSSIBLE ADDON: support for multiple water-mediated bonds ## Obtain list of water molecules water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() } if len(water_list) > 0: ## Iterate water molecules over residue atom list water_neighbors = [(water, match_res) for water in water_list for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res)] # intersect between residues sharing the same interacting water for index_one in range(len(water_neighbors)): water_pair_one = water_neighbors[index_one] for index_two in [ index for index in range(index_one+1, len(water_neighbors)) if water_pair_one[0]==water_neighbors[index][0] ]: water_pair_two = water_neighbors[index_two] res_1 = water_pair_one[1] res_2 = water_pair_two[1] key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) # Check if interaction is polar - NOTE: this is not capturing every angle if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])): # NOTE: Is splitting of sidechain and backbone-mediated interactions desired? if key in interaction_pairs: interaction_pairs[key].interactions.append(WaterMediated()) else: interaction_pairs[key] = InteractingPair(res_1, res_2, [WaterMediated()], dbres[res_1.id[1]], dbres[res_2.id[1]], struc) for p in classified: p.save_into_database() if do_distances: # Distance.objects.filter(structure=struc).all().delete() bulk_distances = [] for i,d in enumerate(distances): distance = Distance(distance=int(100*d[2]),res1=d[0], res2=d[1],gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3],d[4]]), structure=struc) bulk_distances.append(distance) if len(bulk_distances)>1000: pairs = Distance.objects.bulk_create(bulk_distances) bulk_distances = [] pairs = Distance.objects.bulk_create(bulk_distances) return classified, distances
SCALAR_EXPRESSION %s_startenergy = %s SCALAR_EXPRESSION %s_o = %s_startenergy + 4.91 SCALAR_EXPRESSION %s_s = %s SCALAR_EXPRESSION %s_exp = exp(%s_s*(%s_currentenergy-%s_o)) SCALAR_EXPRESSION %s_k = %s SCALAR_EXPRESSION %s_sig = (1-%s_k)+(%s_k/(1+%s_exp)) """) %(structure_id, structurefilename, correspondencefilename, resfilename, structure_id, startenergy, structure_id, structure_id, structure_id, s_value, structure_id, structure_id, structure_id, structure_id, structure_id, k_value, structure_id, structure_id, structure_id, structure_id) fitnessfile.write(outstring) fitnessstring = fitnessstring + str("*%s_sig") % (structure_id) nucleosome = structure[0] atom_list = Selection.unfold_entities(nucleosome, 'A') # A for atoms neighbor_search = NeighborSearch(atom_list) contacts_list = neighbor_search.search_all(radius, level = 'R') repack_residues = [] for contact in contacts_list: res1 = contact[0] res2 = contact[1] res1id = int(res1.get_id()[1]) chain1 = res1.get_parent() chain1id = chain1.get_id() res2id = int(res2.get_id()[1]) chain2 = res2.get_parent() chain2id = chain2.get_id()
def test_Superimposer(self): """Test on module that superimpose two protein structures.""" pdb1 = "PDB/1A8O.pdb" p = PDBParser() s1 = p.get_structure("FIXED", pdb1) fixed = Selection.unfold_entities(s1, "A") s2 = p.get_structure("MOVING", pdb1) moving = Selection.unfold_entities(s2, "A") rot = numpy.identity(3).astype('f') tran = numpy.array((1.0, 2.0, 3.0), 'f') for atom in moving: atom.transform(rot, tran) sup = Superimposer() sup.set_atoms(fixed, moving) self.assertTrue(numpy.allclose(sup.rotran[0], numpy.identity(3))) self.assertTrue(numpy.allclose(sup.rotran[1], numpy.array([-1.0, -2.0, -3.0]))) self.assertAlmostEqual(sup.rms, 0.0, places=3) atom_list = ['N', 'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'N', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'N', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'N', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'N', 'N', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'S', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'S', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] sup.apply(moving) atom_moved = [] for aa in moving: atom_moved.append(aa.element) self.assertEqual(atom_moved, atom_list)
parser.add_argument("--resnum", nargs='+', required=True, type=int) parser.add_argument("--chain2", nargs='+', required=True) args = parser.parse_args() from Bio.PDB import NeighborSearch, PDBParser, Selection structure = PDBParser().get_structure('X', args.pdb) pymol_command = "" chain = structure[0][args.chain1] # Supply chain name for "center residues" center_residues = [chain[resi] for resi in args.resnum] center_atoms = Selection.unfold_entities(center_residues, 'A') for j in args.chain2 : atom_list = [atom for atom in structure[0][j].get_atoms() if atom.name == 'CA' ] ns = NeighborSearch(atom_list) nearby_residues = {res for center_atom in center_atoms for res in ns.search(center_atom.coord, 8.5, 'R')} print "\nNeighbor residues in chain ", j, ": \n" print sorted(res.id[1] for res in nearby_residues) pymol_command = "show spheres, chain " + j + " and resi " for m in sorted(res.id[1] for res in nearby_residues):
def __getitem__(self, res): """ @type res: L{Residue} @return: fragment classification @rtype: L{Fragment} """ return self.fd[res] if __name__=="__main__": import sys p=PDBParser() s=p.get_structure("X", sys.argv[1]) m=s[0] fm=FragmentMapper(m, 10, 5, "levitt_data") for r in Selection.unfold_entities(m, "R"): print r, if r in fm: print fm[r] else: print
def get_residue_depth(pdb_fh,msms_fh): """ Extracts Residue depth from PDB structure :param pdb_fh: path to PDB structure file :param msms_fh: path to MSMS libraries :returns data_depth: pandas table with residue depth per residue """ from Bio.PDB import Selection,PDBParser from Bio.PDB.Polypeptide import is_aa from Bio.PDB.ResidueDepth import get_surface,_read_vertex_array,residue_depth,ca_depth,min_dist surface_fh="%s/%s.msms.vert" % (dirname(msms_fh),basename(pdb_fh)) if not exists(surface_fh): pdb_to_xyzr_fh="%s/pdb_to_xyzr" % dirname(msms_fh) xyzr_fh="%s/%s.xyzr" % (dirname(msms_fh),basename(pdb_fh)) pdb_to_xyzr_com="%s %s > %s" % (pdb_to_xyzr_fh,pdb_fh,xyzr_fh) msms_com="%s -probe_radius 1.5 -if %s -of %s > %s.log" % (msms_fh,xyzr_fh,splitext(surface_fh)[0],splitext(surface_fh)[0]) log_fh="%s.log" % msms_fh log_f = open(log_fh,'a') log_f.write("%s;\n%s\n" % (pdb_to_xyzr_com,msms_com)) subprocess.call("%s;%s" % (pdb_to_xyzr_com,msms_com) , shell=True,stdout=log_f, stderr=subprocess.STDOUT) log_f.close() surface =_read_vertex_array(surface_fh) pdb_parser=PDBParser() pdb_data=pdb_parser.get_structure("pdb_name",pdb_fh) model = pdb_data[0] residue_list = Selection.unfold_entities(model, 'R') depth_dict = {} depth_list = [] depth_keys = [] for residue in residue_list: if not is_aa(residue): continue rd = residue_depth(residue, surface) ca_rd = ca_depth(residue, surface) # Get the key res_id = residue.get_id() chain_id = residue.get_parent().get_id() if chain_id=="A": depth_dict[(chain_id, res_id)] = (rd, ca_rd) depth_list.append((residue, (rd, ca_rd))) depth_keys.append((chain_id, res_id)) # Update xtra information residue.xtra['EXP_RD'] = rd residue.xtra['EXP_RD_CA'] = ca_rd else: break depth_df=pd.DataFrame(depth_dict).T.reset_index() depth_df=depth_df.drop("level_0",axis=1) aasi_prev=0 for i in range(len(depth_df)): if depth_df.loc[i,"level_1"][1]!=aasi_prev: depth_df.loc[i,"aasi"]=depth_df.loc[i,"level_1"][1] aasi_prev=depth_df.loc[i,"level_1"][1] depth_df=depth_df.drop("level_1",axis=1) depth_df=depth_df.loc[~pd.isnull(depth_df.loc[:,"aasi"]),:] depth_df=depth_df.set_index("aasi",drop=True) depth_df.columns=["Residue depth","Residue (C-alpha) depth"] return depth_df
raise PDBException("No transformation has been calculated yet") rot, tran = self.rotran rot = rot.astype('f') tran = tran.astype('f') for atom in atom_list: atom.transform(rot, tran) if __name__ == "__main__": import sys from Bio.PDB import PDBParser, Selection p = PDBParser() s1 = p.get_structure("FIXED", sys.argv[1]) fixed = Selection.unfold_entities(s1, "A") s2 = p.get_structure("MOVING", sys.argv[1]) moving = Selection.unfold_entities(s2, "A") rot = numpy.identity(3).astype('f') tran = numpy.array((1.0, 2.0, 3.0), 'f') for atom in moving: atom.transform(rot, tran) sup = Superimposer() sup.set_atoms(fixed, moving) print(sup.rotran)
import networkx as nx import matplotlib.pyplot as plt from pprint import pprint as pp import numpy as np distanceThreshold = 1 # sys.argv[2] pdbList = PDBList() pdbParser = PDBParser() proteinName = "1MBN" structure = pdbParser.get_structure(proteinName, pdbList.retrieve_pdb_file(proteinName)) resList = Selection.unfold_entities(structure, "R") distanceMatrix = np.zeros([len(resList), len(resList)]) def genDistanceMatrix(dMatrix, rList): caMap = {res.id[1]: res["CA"] for res in rList if "CA" in res} pp(caMap) pp(len(caMap)) genDistanceMatrix(distanceMatrix, resList) # for atm in atom_cords.keys(): # genDistances(atm) # pp(distanceMatrix)