def calc_dihedral(chain, child_res_id): """ calculates the dihedral angles (phi, psi) for residue of index child_res_id in chain returns a tuple of form (phi, psi), if it exists """ from math import pi from Bio import PDB try: CP = chain.child_list[(child_res_id-1)]['C'].get_vector() N = chain.child_list[child_res_id]['N'].get_vector() CA = chain.child_list[child_res_id]['CA'].get_vector() C = chain.child_list[child_res_id]['C'].get_vector() NA = chain.child_list[(child_res_id+1)]['N'].get_vector() except KeyError: return () # no dihedral angles for corner residues or non-a.a.'residues' else: try: phi = PDB.calc_dihedral(CP, N, CA, C)*-180/pi psi = PDB.calc_dihedral(N, CA, C, NA)*-180/pi return (phi, psi) except ZeroDivisionError: return ()
def calc_all_dihedrals(chain, child_res_id): """ calculates the dihedral angles (phi, psi, chia, chib) for residue of index child_res_id in chain returns a tuple of form (phi, psi, chia, chib), if it exists where ambiguity in chi angle definition exists: chia is in reference to the longer side chain or the heavier atom chib to the shorter if no ambiguity, chia=chib if residue is a GLY or ALA return only (phi, psi) """ from math import pi from Bio import PDB try: residue = chain.child_list[child_res_id] name = residue.get_resname() if name == 'GLY' or name == 'ALA': dih = calc_dihedral(chain, child_res_id) phi = dih[0] psi = dih[1] return (phi, psi, 0, 0) except KeyError: print "key error line 103" return () # no dihedral angles for corner residues or non-a.a.'residues' except IndexError: print 'IndexError line 106, probable cause: irregular PDB file' return () try: CP = chain.child_list[(child_res_id-1)]['C'].get_vector() N = chain.child_list[child_res_id]['N'].get_vector() CA = chain.child_list[child_res_id]['CA'].get_vector() C = chain.child_list[child_res_id]['C'].get_vector() NA = chain.child_list[(child_res_id+1)]['N'].get_vector() CB = chain.child_list[child_res_id]['CB'].get_vector() fourth_chi_atom = chain.child_list[child_res_id].child_list[5].get_vector() if name == 'VAL' or name == 'ILE' or name == 'THR': alt_fourth_chi_atom = chain.child_list[child_res_id].child_list[6].get_vector() else: alt_fourth_chi_atom = fourth_chi_atom except KeyError: print 'KeyError line 119' return () # no dihedral angles for corner residues or non-a.a.'residues' except IndexError: print 'IndexError line 122, probable cause: irregular PDB file' return () else: try: phi = PDB.calc_dihedral(CP, N, CA, C)*-180/pi psi = PDB.calc_dihedral(N, CA, C, NA)*-180/pi chia= PDB.calc_dihedral(C, CA, CB, fourth_chi_atom)*-180/pi chib= PDB.calc_dihedral(C, CA, CB, alt_fourth_chi_atom)*-180/pi return (phi, psi, chia, chib) except ZeroDivisionError: return ()
def calc_phi_psi(structure): '''Function makes 3 lists of proteins C-alpha, C and N atom vectors. These lists are then used to calculate dihedral angles of proteins.''' atom_vector_list_Ca = [] atom_vector_list_N = [] atom_vector_list_C = [] # For-loop for acquiring atom vectors, but only for those residues which have a C-alpha atom. for chain in structure.get_chains(): for res in chain: if res.has_id('CA'): for atom in res: if atom.get_name() == 'N': atom_vector_list_N.append(atom.get_vector()) elif atom.get_name() == 'CA': atom_vector_list_Ca.append(atom.get_vector()) elif atom.get_name() == 'C': atom_vector_list_C.append(atom.get_vector()) else: pass len_vec = 0 ### The if statement compares vector list length between C-alpha vector list and two others, if one of them is ### shorter than C-alpha, possibly due to an error in the PDB structure, then the length vector which is ### required for calculating dihedral angles is set to be C-alpha which is the same length as other vector lists if len(atom_vector_list_Ca) > len(atom_vector_list_C) or len(atom_vector_list_Ca) > len( atom_vector_list_N): c_ca = len(atom_vector_list_Ca) - len(atom_vector_list_C) n_ca = len(atom_vector_list_Ca) - len(atom_vector_list_N) if c_ca == n_ca: len_vec = len(atom_vector_list_Ca) - c_ca else: len_vec = len(atom_vector_list_Ca) dihedral_phi = [] dihedral_psi = [] # So we don't include first amino acid which has no phi angle and last amino acid which has no psi angle! cut_off = range(1, len_vec - 1) # Calculation of phi angles! for i in cut_off: dihedral_phi.append(PDB.calc_dihedral(atom_vector_list_C[i - 1], atom_vector_list_N[i], atom_vector_list_Ca[i], atom_vector_list_C[i])) # Calculation of psi angles! for i in cut_off: dihedral_psi.append(PDB.calc_dihedral(atom_vector_list_N[i], atom_vector_list_Ca[i], atom_vector_list_C[i], atom_vector_list_N[i + 1])) return (dihedral_phi, dihedral_psi)
def read(pdb_file): """ reads a pdb file into a structure object :param pdb_file: pdb format file :return: structure """ logging.info(f'reading pdb file: {pdb_file}') if not pdb_file.lower().endswith('.cif'): structure = PDB.PDBParser().get_structure(pdb_file, pdb_file) else: logging.info(f'switched to cif modus for file: {pdb_file}') structure = PDB.MMCIFParser().get_structure(pdb_file, pdb_file) return structure
def calc_ramachandran(file_name_list): """ Main calculation and plotting definition :param file_name_list: List of PDB files to plot :return: Nothing """ global RAMA_PREF_VALUES if RAMA_PREF_VALUES is None: RAMA_PREF_VALUES = _cache_RAMA_PREF_VALUES() # Read in the expected torsion angles normals = {} outliers = {} for key, val in RAMA_PREFERENCES.items(): normals[key] = {"x": [], "y": []} outliers[key] = {"x": [], "y": []} # Calculate the torsion angle of the inputs for inp in file_name_list: if not os.path.isfile(inp): continue structure = PDB.PDBParser().get_structure('input_structure', inp) for model in structure: for chain in model: polypeptides = PDB.PPBuilder().build_peptides(chain) for poly_index, poly in enumerate(polypeptides): phi_psi = poly.get_phi_psi_list() for res_index, residue in enumerate(poly): res_name = "{}".format(residue.resname) res_num = residue.id[1] phi, psi = phi_psi[res_index] if phi and psi: if str(poly[res_index + 1].resname) == "PRO": aa_type = "PRE-PRO" elif res_name == "PRO": aa_type = "PRO" elif res_name == "GLY": aa_type = "GLY" else: aa_type = "General" if RAMA_PREF_VALUES[aa_type][int(math.degrees(psi)) + 180][int(math.degrees(phi)) + 180] < \ RAMA_PREFERENCES[aa_type]["bounds"][1]: outliers[aa_type]["x"].append(math.degrees(phi)) outliers[aa_type]["y"].append(math.degrees(psi)) else: normals[aa_type]["x"].append(math.degrees(phi)) normals[aa_type]["y"].append(math.degrees(psi)) return normals, outliers
def get_dihedral( residue_list ): ''' returns phi and psi angles of a residue and the amino acid sidechain present residue_list - []Bio.PDB.Residue - list of 3 *hopefully* continuous residues ''' for one, two in zip( residue_list[:-1], residue_list[1:] ): if ( two.get_id()[1] - one.get_id()[1] ) != 1: raise BackboneError( "Discontinuous residues", two.get_id()[1] ) atoms = ( {"C": False}, {"N": False, "CA": False, "C": False}, {"N": False} ) for i, residue in enumerate( residue_list ): if i == 1: res_name = SeqUtils.seq1( residue.get_resname() ) if not is_aa( res_name ): raise BackboneError( "Not a valid amino acid", residue.get_id()[1] ) for atom in residue.get_unpacked_list(): if atom.name in atoms[i].keys(): atoms[i][ atom.name ] = atom.get_vector() if False in map( check_dict, atoms ): raise BackboneError( "Missing backbone atoms", residue.get_id()[1] ) dihedrals = [ PDB.calc_dihedral( atoms[0]["C"], atoms[1]["N"], atoms[1]["CA"], atoms[1]["C"] ), #phi PDB.calc_dihedral( atoms[1]["N"], atoms[1]["CA"], atoms[1]["C"], atoms[2]["N"] ) #psi ] return ( dihedrals, res_name )
def Separate_Chains(pdb_file): """Separate the two chains and return their name in a list Input: -pdb file = target file Output: -interaction = list with chain information """ folder = "pdb_chains" if not Check_folder(folder): return False pdb_parser = pdb.PDBParser(PERMISSIVE=True, QUIET=True) pdb_structure = pdb_parser.get_structure("pdb_file", pdb_file) interaction = list( pdb_file[:-4].split("_")[-1] ) # Obtain 2 length lists with the chain names from file name, the order of the letters need match with the order in the pdb file(format= something_chains.pdb) if len(interaction) != 2: #if the length is not true, something goes wrong print(settings.IncorrectName(interaction)) for model in pdb_structure: for chain in model: id = chain.get_id() class chain(pdb.Select): def accept_chain(self, chain): if chain.get_id() == id: return True else: return False io = pdb.PDBIO() io.set_structure(pdb_structure) name = "%s_chain_%s.pdb" % (interaction[0] + interaction[1], interaction[i]) file_name = os.path.join(folder, name) io.save(file_name, chain()) return interaction
def _read_structure(path, pdb_id='pdb', cif_id='cif'): file_name = os.path.basename(path).split('.')[0] file_sufix = os.path.basename(path).split('.')[1] dir_path = os.path.dirname(path) if file_sufix == 'pdb': parser = struct.PDBParser(QUIET=True) structure = parser.get_structure(pdb_id, path) elif file_sufix == 'cif': parser = struct.MMCIFParser() structure = parser.get_structure(cif_id, path) else: print("ERROR: Unreognized file type " + file_sufix + " in " + file_name) sys.exit(1) return structure, dir_path, file_name
def compute_chi3(structure_, model_, chain_, curr_residue_): chi3 = 999.00 if curr_residue_.has_id('CB') and curr_residue_.has_id( 'CG') and curr_residue_.has_id('CD'): curr_cb = structure_[model_.id][chain_.id][ curr_residue_.id]['CB'].get_vector() curr_cg = structure_[model_.id][chain_.id][ curr_residue_.id]['CG'].get_vector() curr_cd = structure_[model_.id][chain_.id][ curr_residue_.id]['CD'].get_vector() if curr_residue_.has_id('NE') and curr_residue_.resname == 'ARG': curr_ne = structure_[model_.id][chain_.id][ curr_residue_.id]['NE'].get_vector() chi3 = round( math.degrees( PDB.calc_dihedral(curr_cb, curr_cg, curr_cd, curr_ne)), 2) if curr_residue_.has_id('OE1') and (curr_residue_.resname == 'GLN' or curr_residue_.resname == 'GLU'): curr_oe1 = structure_[model_.id][chain_.id][ curr_residue_.id]['OE1'].get_vector() chi3 = round( math.degrees( PDB.calc_dihedral(curr_cb, curr_cg, curr_cd, curr_oe1)), 2) if curr_residue_.has_id('CE') and curr_residue_.resname == 'LYS': curr_ce = structure_[model_.id][chain_.id][ curr_residue_.id]['CE'].get_vector() chi3 = round( math.degrees( PDB.calc_dihedral(curr_cb, curr_cg, curr_cd, curr_ce)), 2) if curr_residue_.has_id('CB') and curr_residue_.has_id( 'CG') and curr_residue_.has_id('SD') and curr_residue_.has_id( 'CE') and curr_residue_.resname == 'MET': curr_cb = structure_[model_.id][chain_.id][ curr_residue_.id]['CB'].get_vector() curr_cg = structure_[model_.id][chain_.id][ curr_residue_.id]['CG'].get_vector() curr_sd = structure_[model_.id][chain_.id][ curr_residue_.id]['SD'].get_vector() curr_ce = structure_[model_.id][chain_.id][ curr_residue_.id]['CE'].get_vector() chi3 = round( math.degrees(PDB.calc_dihedral(curr_cb, curr_cg, curr_sd, curr_ce)), 2) return chi3
def calc_vecsum(metVal, ox): # print(valenceDictionary.keys()) # The borderline and outlier thresholds are >0.10 and >0.23, respectively, for nVECSUM, # >10% and >25%, respectively, for the vacancy parameter, which is the percentage of all expected coordination sites left vacant (Supplementary Fig. 2 and Supplementary Table 2). For example, ions with all coordination sites occupied by ligands (vacancy = 0) are classi- fied as acceptable. For geometry with an expected coordination number greater than four, metals with one vacant coordina- tion site (vacancy ≤ 25%) are borderline, and metals with two or more vacant coordination sites (vacancy > 25%) vecsum = 0 fij = PDB.Vector(x=0, y=0, z=0) bonds = [ key for key in metVal[ox].keys() if key not in ['coordNum', 'valence'] ] for bond in bonds: distance = metVal[ox][bond]['dist'] metVec = metVal[ox][bond]['metVec'] ligVec = metVal[ox][bond]['ligVec'] # print('metVec',metVec) # print('ligVec',ligVec) vec = (ligVec - metVec) rij = vec.__truediv__(distance) ligOcc = metVal[ox][bond]['ligOcc'] bondValence = metVal[ox][bond]['bond_val'] # print('blha: ' + str(bondValence)) sij = float(ligOcc) * bondValence # print('sij',sij) # raise TypeError('somethingHappend ' + str(ij)) fij = fij.__add__(np.multiply(rij.get_array(), sij)) # print('fij: ',fij) vecsum = math.sqrt(fij.__mul__(fij)) / metVal[ox]['valence'] # print('vecsum: ',vecsum) return vecsum
def PDBToNPY(self, fpathin): parser = PDB.PDBParser() io = PDB.PDBIO() struct = parser.get_structure('1ABZ', fpathin) allcoords1 = [] for model in struct: for chain in model: for residue in chain: for atom in residue: x, y, z = atom.get_coord() cSet = [] cSet.append(x) cSet.append(y) cSet.append(z) allcoords1.append(cSet) return allcoords1
def AngleFinder(Atom1,Atom2,Atom3): vector1 = Atom1.get_vector() vector2 = Atom2.get_vector() vector3 = Atom3.get_vector() angle = bp.calc_angle(vector1,vector2,vector3) return math.degrees(angle)
def assign_sensitivity(structure, md_df, chain, pdb_path, go): """ Changed: lookup the sensitivities directly in the df, no dict. :param structure: :param md_df: :param chain: :param pdb_path: :return: """ seq_pdb = [] residues = structure[0][chain] for res in residues: # move along the protein chain if not pdb.is_aa(res): continue aa = three2single[res.get_resname()] seq_pdb.append(aa) # get the sequence: aas = ''.join(md_df['AA'].values[1:].tolist()) # align seq_md = ''.join(md_df['AA'][1:]) aligned_md, aligned_pdb, identity = water(seq_md, seq_pdb) gos = [c for c in md_df.columns if c.startswith('GO:')] for aa_md, aa_pdb, res, pos in zip(aligned_md, aligned_pdb, residues, range(len(aligned_md))): if aa_md == '-' or aa_pdb == '-': continue res.sensitivity = {go: md_df.loc[pos, go] for go in gos} return structure
def test_is_protein(self): struct = bpdb.PDBParser().get_structure( "temp", 'test/forgi/threedee/data/1MFQ.pdb') chains = struct.get_chains() for c in chains: ftup.is_protein(c)
def generate_hit_distance_matrix(self, type='CA'): hit_distance_matrix = np.zeros((self.hit_span, self.hit_span)) r1_type = 'CA' r2_type = 'CA' if type == 'NO': r1_type = 'N' r2_type = 'O' parser = PDB.PDBParser() chains = parser.get_structure(id='temp', file=self.pdb_path)[0] chain = chains[ self.chain_id] if self.chain_id in chains else chains['A'] for residue1 in chain.get_residues(): r1 = residue1.id[1] if self.hit_range[0] < r1 < self.hit_range[1]: for residue2 in chain.get_residues(): r2 = residue2.id[1] if self.hit_range[0] < r2 < self.hit_range[ 1] and r1_type in residue1 and r2_type in residue2: distance = residue1[r1_type] - residue2[r2_type] hit_distance_matrix[r1 - self.hit_range[0]][ r2 - self.hit_range[0]] = distance return hit_distance_matrix
def get_phi_psi(structure): """ Calculate phi,psi dihedral angles and return lists. Uses the polypeptide class.""" # Create a list of polypeptide objects ppb = PDB.PPBuilder() pp_list = ppb.build_peptides(structure) # Get phi and psi angles phi_angles_list = [] psi_angles_list = [] # Iterate over polypeptide molecules for pp in pp_list: # Calculate phi and psi angles and unpack list and tuple Agg_phi = [] Agg_psi = [] for phi,psi in pp.get_phi_psi_list(): # put them in the lists Agg_phi.append(phi) Agg_psi.append(psi) phi_angles_list.append(Agg_phi) psi_angles_list.append(Agg_psi) return phi_angles_list, psi_angles_list
def test_parse_chain(pdbfile): """Test parsing residues from a PDB file vs the BioPython implementation.""" # Parse using our code with open(pdbfile) as fobj: residues1 = list(parse_pdb_chain(fobj)) # Parse using BioPython parser = PDB.PDBParser() structure = parser.get_structure('test', pdbfile) residues2 = list(structure.get_residues()) assert len(residues1) == len(residues2) # Compare residues for res1, res2 in zip(residues1, residues2): # Residue attributes assert res1.name == res2.resname assert res1.seq == res2.id[1] # Compare atoms assert len(res1.atoms) == len(res2) # Both should be in the same order they were in in the file... for a1, a2 in zip(res1.atoms, res2): assert a1.name == a2.name assert np.allclose(a1.coord, a2.coord) assert a1.serial == a2.serial_number
class PASS(PredictionAlgorithm): pdbParser = PDB.PDBParser(PERMISSIVE=1) def __init__(self, pdbLoader, outputFolder): self.executionString = "./algo/pass %s" PredictionAlgorithm.__init__(self, pdbLoader, outputFolder) def run_one(self, structure): PredictionAlgorithm.run_one(self, structure) print structure.pdbID + "_asps.pdb" # cleanup # TODO: test whether the files is present... try: copyfile(structure.fileName, self.outputFolder + "/" + structure.pdbID + ".pdb") except: pass try: tryMove(structure.pdbID + "_asps.pdb", self.outputFolder + "/" + structure.pdbID + "_asps.pdb") tryMove(structure.pdbID + "_lig1.pdb", self.outputFolder + "/" + structure.pdbID + "_lig1.pdb") tryMove(structure.pdbID + "_lig2.pdb", self.outputFolder + "/" + structure.pdbID + "_lig2.pdb") tryMove(structure.pdbID + "_lig3.pdb", self.outputFolder + "/" + structure.pdbID + "_lig3.pdb") tryMove(structure.pdbID + "_probes.pdb", self.outputFolder + "/" + structure.pdbID + "_probes.pdb") except: pass
def __init__(self, xtal=False, num_range=False, verbose=False): if xtal: self.verbose = verbose try: xtal.pdb_code self.structure = xtal except: self.structure = Structure.objects.get( pdb_code__index=xtal.upper()) self.parent_prot_conf = ProteinConformation.objects.get( protein=self.structure.protein_conformation.protein.parent) io = StringIO(self.structure.pdb_data.pdb) self.pdb_struct = PDB.PDBParser(QUIET=True).get_structure( self.structure.pdb_code.index, io)[0] self.range = [] if num_range: self.range = [[int(i) for i in num_range.split('-')]] else: for t in ProteinSegment.objects.filter(proteinfamily='GPCR', category='helix'): resis = Residue.objects.filter( protein_conformation__protein=self.structure. protein_conformation.protein.parent, protein_segment=t) if len(resis) == 0: continue self.range.append([ resis[0].sequence_number, resis.reverse()[0].sequence_number ])
def _extract_seq_from_pdb(pdb_filepath, AA3_to_AA1=generic.AA3_to_AA1): parser = PDB.PDBParser(QUIET=True) with open(pdb_filepath, 'r') as file: struct = parser.get_structure('placeholder', file) cid_seq_map = dict() for model in struct: for chain in model: seq = [] for residue in chain: atom_type, res_id = residue.get_id()[:2] # res_id should start from 1 if res_id < len(seq) + 1: continue while res_id > len(seq) + 1: seq.append("X") if atom_type == " ": res_3 = residue.resname try: res_1 = AA3_to_AA1[res_3] except IndexError: continue seq.append(res_1) cid_seq_map[chain.id] = "".join(seq) break return cid_seq_map # # if __name__ == "__main__": # path = list(os.listdir(paths.PDB_FOLDER))[0] # path = os.path.join(paths.PDB_FOLDER, path) # print(path) # print(_extract_seq_from_pdb(path))
def pdb2xyz(inputfile, outputPrefix, keepIntermediate=False): """pdb2xyz: Transform a pdb file to a goccs compatible xyz file with number of atoms, elements and coordinates into an ouputfile, prefixed with outputPrefix.xyz. If you set keepIntermediate to true then the pdb file written by PDBFixer will be kept in the output folder. """ pdbfixedfilename = outputPrefix + "_fixed.pdb" xyzoutfilename = outputPrefix + ".xyz" fixer = pdbfixer.PDBFixer(inputfile) fixer.removeHeterogens(False) PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbfixedfilename, 'w')) parser = PDB.PDBParser() #parser = PDB.MMCIFParser() #in case it's a cif file structure = parser.get_structure("input", pdbfixedfilename) #print(dir(structure)) natoms = sum(1 for _ in structure.get_atoms()) #print("Writing output") outputhandle = open(xyzoutfilename, "w") outputhandle.write("""%d empty line\n""" % (natoms)) for atom in structure.get_atoms(): element = atom.element coords = atom.get_coord() outputhandle.write("%s %.3f %.3f %.3f\n" % (element, coords[0], coords[1], coords[2])) outputhandle.close() if not keepIntermediate: os.remove(pdbfixedfilename)
def save_results(out_models, output, directory, verbose): """Saves the resulting models into PDB files. Creates a specific directory for the model if it does not exist. Additionally, each chain receives a new ID in order to distinguish those chains that were equivalent. Keyword arguments: out_models -- list of the resulting model objects created by the program output -- name of the output model/file given by the user verbose -- boolean, prints to stderr the progress of the program""" u = 1 if verbose: sys.stderr.write("Saving models...\n") io = PDB.PDBIO() if not os.path.exists(directory): os.makedirs(directory) for i in range(len(out_models)): id_list = [] final_model = UpdModel(str(i)) old_model = out_models[i] for chain in old_model.get_chains(): new_chain = chain.copy() new_chain.id = new_id(id_list) id_list.append(new_chain.id) final_model.add(new_chain) io.set_structure(final_model) io.save(directory + "/" + output + "_" + str(u) + ".pdb") if verbose: sys.stderr.write(" " + output + "_" + str(u) + ".pdb saved\n") u += 1
def ligandfilter(pdb): """ Remove water and other ligands from pdb. :param pdb: PDB.Structure.Structure :return: None """ # Remove non amino acid residues # To upkeep the integrity due to detaching, iterate over child_list copy! for model in pdb.child_list[:]: for chain in model.child_list[:]: for res in chain.child_list[:]: if not PDB.is_aa(res): chain.detach_child(res.id) if len(chain) == 0: model.detach_child(chain) if len(model) == 0: pdb.detach_child(model) # if the pdb still has more than one model, it's probably an NMR structure # simply keep the first model if len(pdb) > 1: for model in pdb.child_list[1:]: pdb.detach_child(model.id) if len(pdb.child_list[0]) > 1: model = pdb.child_list[0] for chain in model.child_list[1:]: model.detach_child(chain.id) # There is only one model left assert len(pdb) == 1 # This model has only one chain assert len(pdb.child_list[0]) == 1
def calc_dihedral(self): cb = self.cov_receptor.parent['CB'] ca = self.cov_receptor.parent['CA'] lig_cov_neoghbors = self.get_atom_neighbors(self.cov_ligand, list(self.ligand.get_atoms())) self.angles = list() ang1 = math.degrees( bp.calc_dihedral(ca.get_vector(), cb.get_vector(), self.cov_receptor.get_vector(), self.cov_ligand.get_vector())) self.angles.append(ang1) for i in lig_cov_neoghbors: ang = math.degrees( bp.calc_dihedral(cb.get_vector(), self.cov_receptor.get_vector(), self.cov_ligand.get_vector(), i.get_vector())) self.angles.append(ang)
def parse_pdb(pdb_file): #pdb_file = 'pdb5l6t.ent' #np.random.choice(pdb_list) p = bio.PDBParser() s = p.get_structure('X', pdb_file) gen = s.get_models() l = list(gen) mod = l[np.random.randint( 0, len(l))] #choose random model when more than 1 exists seq_strs = [] seq_locs = [] for chain in mod: seq_str = '' seq_loc = [] for residue in chain: if residue.get_id()[0] == ' ': letter_code = residue_letter_codes[residue.get_resname()] seq_str += letter_code for atom in residue: seq_loc.append(atom.get_full_id()[3][1]) seq_strs.append(seq_str) seq_locs.append(np.unique(seq_loc)) return seq_strs, seq_locs
def annotate_fallback(chain_list): """ If neither DSSR nor MC-Annotate are available, we use an ad-hoc implementation of canonical basepair detection as fallback. This does not work well for missing atoms or modified residues. """ kdtree = bpdb.NeighborSearch( [atom for chain in chain_list for atom in chain.get_atoms()]) pairs = kdtree.search_all(10, "R") basepairs = {} # Sorted, so conflicting basepairs are deterministically solved for res1, res2 in sorted(pairs): if res1.resname.strip() not in RNA_RESIDUES or res1.id[0].startswith( "H_"): continue if res2.resname.strip() not in RNA_RESIDUES or res2.id[0].startswith( "H_"): continue labels = {res1.resname.strip(), res2.resname.strip()} try: is_bp = is_basepair_pair(res1, res2) if is_bp: res1_id = fgr.resid_from_biopython(res1) res2_id = fgr.resid_from_biopython(res2) if res1_id in basepairs: warnings.warn("More than one basepair detected for {}." " Ignoring {}-{} because {}-{} is already" " part of the structure".format( res1_id, res1_id, res2_id, res1_id, basepairs[res1_id])) continue if res2_id in basepairs: warnings.warn("More than one basepair detected for {}." " Ignoring {}-{} because {}-{} is already" " part of the structure".format( res2_id, res2_id, res1_id, res2_id, basepairs[res2_id])) continue basepairs[res1_id] = res2_id basepairs[res2_id] = res1_id except KeyError as e: log.debug("Missing atom %s. %s has atoms %s, %s has atoms %s", e, res1, res1.child_dict, res2, res2.child_dict) pass seq_ids = [] for chain in sorted(chain_list, key=lambda x: x.id): for residue in chain: seq_ids.append(fgr.resid_from_biopython(residue)) bpseq = "" chain_dict = {c.id: c for c in chain_list} for i, seqid in enumerate(seq_ids): if seqid in basepairs: bp = seq_ids.index(basepairs[seqid]) + 1 else: bp = 0 bpseq += "{} {} {}\n".format( i + 1, chain_dict[seqid.chain][seqid.resid].resname.strip(), bp) return bpseq, seq_ids
def __init__(self, out_dir=None): """ Create parsing and writing objects, specify output directory. """ self.parser = PDBParser(QUIET=True) self.writer = PDB.PDBIO() if out_dir is None: out_dir = os.path.join(os.getcwd(), "chain_PDBs") self.out_dir = out_dir
def load_structures(files_to_load, quiet=False): """Load PDB files from a list and return a list of the structures""" parser = PDB.PDBParser(QUIET=True, PERMISSIVE=True) structures = [] longest_line_len = 0 for file in files_to_load: name = os.path.splitext(file)[0] if not quiet: print_line = "Loading " + name + "..." print(print_line, end="\r") longest_line_len = max(longest_line_len, len(print_line)) new_structure = parser.get_structure(name, file) # Remove residue 0 to dedicate it to the donor fluorophore for new_model in new_structure: for new_chain in new_model: for residue in new_chain: if residue.id[1] == 0: new_chain.detach_child(residue.id) structures.append(new_structure) # save_structure(new_structure, name + ".no0.pdb") if not quiet: final_print_str = "Loaded " + str(len(files_to_load)) + " structures." num_spaces = max(0, longest_line_len - len(final_print_str)) print(final_print_str + " " * num_spaces) return structures
def adjacencyMat(prot, chainID, seqIDs, normalise=True, mode='bool', thresh=6.0): size = len(seqIDs) mat = np.zeros(shape=(size, size)) prefix = "./PPI4DOCK/PPI4DOCK_docking_set/" chainFile = f"{prefix}/{prot}/{chainID}_model_st.pdb" parser = PDB.PDBParser() structure = parser.get_structure(chainID, chainFile) chain = structure[0][chainID] for i, resA in enumerate(seqIDs): for j, resB in enumerate(seqIDs): if resA != resB: distance = centralCarbon(chain[resA]) - centralCarbon( chain[resB]) else: #same residue,self loop distance = 0 mat[i][j] = distance if mode == 'distance': mat = np.where(mat < thresh, thresh - mat, 0) if normalise: mat = mat / thresh elif mode == 'bool': mat = np.where(mat < thresh, 1, 0) return mat
def get_dssp_info(PDB_file, model, dir): """Runs DSSP on protein input""" #TODO : you can run DSSP through biopython. The output contains a lot of useful information. #Tip : make sure your secondary structure indexing matches the sequence order in the PDB file! return PDB.DSSP(model, dir + '/' + PDB_file, dssp='mkdssp')
def main(): parser = optparse.OptionParser() parser.add_option("-p", "--pdb", dest="pdb", help="path to PDB file", metavar="STRING") parser.add_option("-f", "--pdb_fasta", dest="pdb_fasta", help="path to PDB fasta file (out)", metavar="STRING") (options, args) = parser.parse_args() pdb_fasta = options.pdb_fasta pdb_file = options.pdb pdb_name = os.path.basename(pdb_file).split(".")[0] parser = BP.PDBParser() ppb = PPBuilder(radius=1000) # retrieve all amino acids pdbseq = "" structure = parser.get_structure(pdb_name, pdb_file) model = structure[0] for chain in model: for pp in ppb.build_peptides(model[chain.id], aa_only=False): pdbseq += (pp.get_sequence()) print ">", pdb_name, len(pdbseq) print pdbseq with open(pdb_fasta, "w") as o: o.write(">%s %i\n%s\n" % (pdb_name, len(pdbseq), pdbseq))
def pdb2cd(name): f = name + ".pdb" dssp_tuple = dssp_dict_from_pdb_file(f) dssp_dict = dssp_tuple[0] p = PDBParser(QUIET=True).get_structure("file", f) # Initiates and fills array ("cc") with chains. cc = [chain.get_id() for model in p for chain in model] # Determines length of sequence, initiates an array ("ss") of same length. howLong = ss_out = 0 for c in cc: howLong += len([_ for _ in p[0][c].get_residues() if PDB.is_aa(_)]) if not howLong == len(dssp_tuple[1]): howLong = len(dssp_tuple[1]) ss = np.arange(1, howLong + 1) # Fills the array ("ss") with secondary structures. for i in ss: ss_lib = dssp_dict[dssp_tuple[1][ i - 3]] # ss_lib = dssp_dict[(dssp_tuple[1][0][0], (' ', i-1, ' '))] dict_ss = ss_lib[1] if dict_ss == 'H': ss_out = 0 if dict_ss == 'E': ss_out = 1 if dict_ss == '-': # else:# dict_ss == '-': ss_out = 2 ss[i - 1] = ss_out # Returns the fractional composition of alpha helix, beta sheet or random coil. alpha = (ss == 0).sum() / ss.__len__() beta = (ss == 1).sum() / ss.__len__() coil = (ss == 2).sum() / ss.__len__() abc = [alpha, beta, coil] return abc
def create_g_alpha_pdb_array(signprot_complex): segments = ProteinSegment.objects.filter(proteinfamily='Alpha') residues = Residue.objects.filter( protein_conformation__protein__entry_name=signprot_complex. structure.pdb_code.index.lower() + '_a') pdb_array = OrderedDict() parse = GPCRDBParsingPDB() for s in segments: if s.slug not in pdb_array: pdb_array[s.slug] = OrderedDict() for r in residues.filter(protein_segment=s): try: rotamers = Rotamer.objects.filter( structure=signprot_complex.structure, residue__display_generic_number__label=r. display_generic_number.label) if len(rotamers) == 0: raise Exception() rotamer = parse.right_rotamer_select(rotamers) p = PDB.PDBParser(QUIET=True).get_structure( 'structure', StringIO(rotamer.pdbdata.pdb))[0] atoms = [] for chain in p: for res in chain: for atom in res: atoms.append(atom) except: atoms = 'x' pdb_array[r.protein_segment.slug][ r.display_generic_number.label] = atoms return pdb_array
def calc_vecsum(structure,metalName,valenceDictionary): # print(valenceDictionary.keys()) metals = ["FE", "CO", "MN", "CU", "NI", "MO","W", "V"] atoms = list(structure.get_atoms()) metalRow = get_metalRow(list(structure.get_atoms()),metalName) metalAtom = atoms[metalRow] numAtoms = len(atoms) vecsum = 0 fij = PDB.Vector(x=0,y=0,z=0) for idx in range(0,numAtoms): if idx != metalRow: # print('blah') atomNames = metalName+"_"+atoms[idx].get_name().upper() ligandAtom = atoms[idx] distance = abs(ligandAtom - metalAtom) vec = (ligandAtom.get_vector() - metalAtom.get_vector()) rij = vec.__truediv__(distance) ligOcc = ligandAtom.get_occupancy() # print('ligOCC: ',ligOcc) # print('valence: ',valenceDictionary[atomNames]['Valence']) oxInd = valenceDictionary[atomNames]['Ox'].index(valenceDictionary['oxNum']) bondValence = float(valenceDictionary[atomNames]['Valence'][oxInd]) # print('blha: ' + str(bondValence)) sij = float(ligOcc) * bondValence # print('sij',sij) # raise TypeError('somethingHappend ' + str(ij)) fij = fij.__add__(np.multiply(rij.get_array(),sij)) # print('fij: ',fij) vecsum = math.sqrt(fij.__mul__(fij)) / float(valenceDictionary['Valency']) # print('vecsum: ',vecsum) return vecsum
def load_pdb_fobject(self, fobject): parser = PDB.PDBParser(QUIET = True) res = parser.get_structure("c",fobject) for a in res.get_atoms(): if re.match(r'^[A-Z]{1,2}[0-9]?\*$',a.id): a.id = a.id.replace("*","'") return res
def from_structure(structure): """Return contact data from a 3D structure (in pdb format). """ try: from Bio import PDB if isinstance(structure, str): p = PDB.PDBParser() structure = p.get_structure('S', structure) if isinstance(structure, PDB.Structure.Structure): for _ in structure.get_chains(): atoms = [ np.array(atom.get_coord()) for atom in structure.get_atoms() ] except ImportError: print("Biopython not found.") raise atoms = np.array(structure) try: import scipy D = scipy.spatial.distance.pdist(atoms, 'euclidean') D = scipy.spatial.distance.squareform(D) except ImportError: print("Scipy not found.") raise m = np.max(1 / D[D != 0]) M = np.zeros(D.shape) M[D != 0] = 1 / D[D != 0] M[D == 0] = m return M
def read_dssp(dssp_file): try: dssp, keys = BP.make_dssp_dict(dssp_file) except(PDBException): print("SKIPPING THIS protein: pdb exception occurred for %s" % dssp_file) return return dssp, keys
def _get_system_vectors(rotation_axis, m_point, target_vector): r = pdb.vector_to_axis(rotation_axis, m_point) # the perpendicular projection m_point to rotation_axis o = m_point - r # corresponded rotation axis vector if r.norm() < 1e-9: # m_point on rotation axis return r_normd = r.normalized() f = target_vector - o theta_norm = rotation_axis.normalized() s_normd = r_normd ** theta_norm return r, f, r_normd, s_normd
def compute_torsion_angles(previous_residue, residue, next_residue): """ Little helper function, calculates the backbone phi and psi torsion angles from the given residues and returns them :param residue: The amino acid residue the torsion angles shall be computed :return: Phi and psi backbone torsion angles """ # print previous_residue.get_id()[1], residue.get_id()[1], next_residue.get_id()[1] # extract the atoms for the torsion calculation # 1.) for the phi atom_CO_0 = previous_residue['C'].get_vector() atom_N_1 = residue['N'].get_vector() atom_CA_1 = residue['CA'].get_vector() atom_CO_1 = residue['C'].get_vector() atom_N_2 = next_residue['N'].get_vector() phi_angle = PDB.calc_dihedral(atom_CO_0, atom_N_1, atom_CA_1, atom_CO_1) psi_angle = PDB.calc_dihedral(atom_N_1, atom_CA_1, atom_CO_1, atom_N_2) # convert into degrees return math.degrees(phi_angle), math.degrees(psi_angle)
def analyze_dihedral(self): """ Deprecated. Please use class Dihedral_Analisys """ angles = list() cov_atm_lig = self.ligand.child_dict[self.ligand_dict['cov_atm']] ##dihedral between CA < CB < SG < ligand angle_1 = math.degrees( bp.calc_dihedral(self.covalent_res.child_dict['CA'].get_vector(), self.covalent_res.child_dict['CB'].get_vector(), self.covalent_atm_res.get_vector(), cov_atm_lig.get_vector())) angles.append(angle_1) ##all dihedral of CB < SG < ligand-covalent-atom < other ligand atoms ns = bp.NeighborSearch(list(self.ligand.get_atom())) neigh = ns.search(cov_atm_lig.get_coord(), 2) neigh = filter(lambda x: x.name != self.ligand_dict['cov_atm'], neigh)# removes the atom itself for i in neigh: ang = math.degrees( bp.calc_dihedral(self.covalent_res.child_dict['CB'].get_vector(), self.covalent_atm_res.get_vector(), cov_atm_lig.get_vector(), i.get_vector())) angles.append(ang) open('/'.join([self.path, DIHEDRAL_OUTPUT]), 'w').write(reduce(lambda x, ang: ' '.join([x, str(ang)]), angles, ''))
def __get_residues__(self, structure): """ Gets all amino acids residues from a given structure and stores them in an array. parameters: ---------------- structure: PDB strutore obj, openened PDB structure file object Returns: --------------------------------------- array: np-arr, residue objects from Bio.PDB """ residues_arr = [] for res_i in structure.get_residues(): if PDB.is_aa(res_i): residues_arr.append(res_i) return(np.array(residues_arr))
def calculate_torsion_psi(current_residue, next_residue): atom1 = current_residue['N'].get_vector() atom2 = current_residue['CA'].get_vector() atom3 = current_residue['C'].get_vector() atom4 = next_residue['N'].get_vector() return PDB.calc_dihedral(atom1, atom2, atom3, atom4)
def calculate_torsion_phi(previous_residue, current_residue): atom1 = previous_residue['C'].get_vector() atom2 = current_residue['N'].get_vector() atom3 = current_residue['CA'].get_vector() atom4 = current_residue['C'].get_vector() return PDB.calc_dihedral(atom1, atom2, atom3, atom4)
def get_pose_constraints(Pose, MaxDist, MinPositionSeperation, SasaRadius, SasaScale, UpstreamGrep, DownstreamGrep, NeedHydrogen=True): ''' ''' # AlexsSasaCalculator is from Alex's interface_fragment_matching # thanks Alex! # # This is used to give buried polar contacts more weight. Thanks Alex Ford! try: from interface_fragment_matching.utility.analysis import AtomicSasaCalculator # make instace of Alex's sasa calculator AlexsSasaCalculator = AtomicSasaCalculator(probe_radius=SasaRadius) ResidueAtomSasa = AlexsSasaCalculator.calculate_per_atom_sasa(Pose) except ImportError: ' Error: SASA weighting of contacts requires interface_fragment_matching from Alex Ford ' # for making full atom kd tree ResAtmCoordLists = [] # for translating from kd tree index to ( residue, atom ) coord ResAtmRecordLists = [] # loop through all residue numbers for Res in range(1, Pose.n_residue() + 1): # remade for each residue AtmRecordList = [] AtmCoordList = [] # loop through residue's atom numbers for Atm in range(1, Pose.residue(Res).natoms() + 1): # add (residue, atom) coord to residue's list AtmRecordList.append((Res, Atm)) # add atom xyz coord to residue's list AtmCoordList.append( np.array(list(Pose.residue(Res).atom(Atm).xyz())) ) # add residue's lists to respective global lists ResAtmCoordLists.extend(AtmCoordList) ResAtmRecordLists.extend(AtmRecordList) ResidueAtomArray = np.array( ResAtmCoordLists ) ResidueAtomKDTree = spatial.KDTree( ResidueAtomArray ) ResidueAtomNeighbors = ResidueAtomKDTree.query_ball_point( ResidueAtomArray, MaxDist ) # ResidueAtomNearNeighbors = ResidueAtomKDTree.query_ball_point( ResidueAtomArray, 2.0 ) ResidueAtomHydrogens = ResidueAtomKDTree.query_ball_point( ResidueAtomArray, 1.1 ) # holds constraints before printing AllConstraints = [] # holds sorted cst AllBackboneBackboneCst = [] AllBackboneSidechainCst = [] AllSidechainSidechainCst = [] # All contacts are from upstream to downstream residues to avoid double counting Upstream = [] for UpIndex, UpXyzCoords in enumerate(ResAtmCoordLists): UpRes, UpAtm = ResAtmRecordLists[UpIndex] # # loop through residues storing info on oxygens # for UpRes in range( 1, Pose.n_residue() + 1 ): # # loop through atoms # for UpAtm in range( 1, Pose.residue(UpRes).natoms() + 1 ): UpName = Pose.residue(UpRes).atom_name(UpAtm).replace(' ', '') # skip virtual residues if Pose.residue(UpRes).is_virtual(UpAtm): continue # this guy # / # checks upstream name V if re.match(UpstreamGrep, UpName ): # print '\n'*2 # print 'UpRes, UpName', UpRes, UpName # get neighbors of upstream residues NeighborsOfUpstream = ResidueAtomNeighbors[UpIndex] # prep for loop Downstreams = [] Constraints = [] BackboneBackboneCst = [] BackboneSidechainCst = [] SidechainSidechainCst = [] # ArbitrayOrderOfAtomNames = {} for DownIndex in NeighborsOfUpstream: # name presumes downstream, checks with if imediately below DownRes, DownAtm = ResAtmRecordLists[DownIndex] # checks that downstream residue is dowstream of upstream and passes min primary sequence spacing if DownRes - UpRes >= MinPositionSeperation: DownName = Pose.residue(DownRes).atom_name(DownAtm).replace(' ', '') # skip if same atom if UpRes == DownRes: if UpName == DownName: continue # skip virtual residues if Pose.residue(DownRes).is_virtual(DownAtm): continue # checks downstream name if re.match( DownstreamGrep, DownName ): # print 'DownRes, DownName', DownRes, DownName PotentialUpstreamHydrogens = ResidueAtomHydrogens[UpIndex] UpstreamHydrogens = [] # print 'PotentialUpstreamHydrogens', PotentialUpstreamHydrogens for UpH_I in PotentialUpstreamHydrogens: UpH_Res, UpH_Atm = ResAtmRecordLists[UpH_I] UpH_Name = Pose.residue(UpH_Res).atom_name(UpH_Atm).replace(' ', '') # print 'UpH_Name', UpH_Name if 'H' in UpH_Name: UpstreamHydrogens.append((UpH_Res, UpH_Atm, UpH_Name)) # print 'UpstreamHydrogens', UpstreamHydrogens PotentialDownstreamHydrogens = ResidueAtomHydrogens[DownIndex] DownstreamHydrogens = [] # print 'PotentialDownstreamHydrogens', PotentialDownstreamHydrogens for DownH_I in PotentialDownstreamHydrogens: DownH_Res, DownH_Atm = ResAtmRecordLists[DownH_I] DownH_Name = Pose.residue(DownH_Res).atom_name(DownH_Atm).replace(' ', '') # print 'DownH_Name', DownH_Name if 'H' in DownH_Name: DownstreamHydrogens.append((DownH_Res, DownH_Atm, DownH_Name)) # print 'DownstreamHydrogens', DownstreamHydrogens # check their is at least one hydrogen in system before adding constraint if len(UpstreamHydrogens) or len(DownstreamHydrogens) or NeedHydrogen == False: # these trys / excepts seperate # backbone-backbone from # backbone-sidechain from # sidechain-sidechain interactions # # in future maybe sort into seperate lists, shouldn't rely on ResidueAtomSasa to know what is in backbone try: UpstreamSasa = ResidueAtomSasa[UpRes][UpName] DownstreamSasa = ResidueAtomSasa[DownRes][DownName] AverageSasa = np.mean([UpstreamSasa, DownstreamSasa]) BBBB = 1 BBSC = SCSC = 0 except KeyError: # These lines handle backbone to sidechain interactions # set weight equal to the most buried try: UpstreamSasa = ResidueAtomSasa[UpRes][UpName] AverageSasa = SasaScale.FloorSasa BBSC = 1 BBBB = SCSC = 0 except KeyError: try: DownstreamSasa = ResidueAtomSasa[DownRes][DownName] AverageSasa = SasaScale.FloorSasa BBSC = 1 BBBB = SCSC = 0 # set weight of side chain side chain equal to the most buried except KeyError: AverageSasa = SasaScale.CeilingSasa SCSC = 1 BBSC = BBBB = 0 # use instance of sasa_scale to calculate weight based on avg sasa of N and O SasaBasedWeight = SasaScale.weigh(AverageSasa) # print # print 'AverageSasa', AverageSasa # print 'SasaBasedWeight', SasaBasedWeight # print 'found downstream neighbor %s'%DownName DownXyzCoords = np.array( list(Pose.residue(DownRes).atom(DownAtm).xyz()) ) # print 'DownRes, DownName', DownRes, DownName # print 'DownXyzCoords', DownXyzCoords # ## Get neighbors for angles and torsions to use with AtomPairs SelectUpNeighbors = [] # iterates through upstream atom neighbors for references for angle for UpNeighborIndex in NeighborsOfUpstream: UpNeighborRes, UpNeighborAtm = ResAtmRecordLists[UpNeighborIndex] UpNeighborName = Pose.residue(UpNeighborRes).atom_name(UpNeighborAtm).replace(' ', '') # keep looking if neighbor is hyrdogen if 'H' in UpNeighborName: continue # skip virtual residues if Pose.residue(UpNeighborRes).is_virtual(UpNeighborAtm): continue # keep looking if neighbor is self if UpNeighborName == UpName and UpNeighborRes == UpRes: continue # keep looking if neighbor is downstream residue again if UpNeighborName == DownName and UpNeighborRes == DownRes: continue UpNeighborCoords = ResAtmCoordLists[UpNeighborIndex] DistanceToNeighbor = solenoid_tools.vector_magnitude( UpXyzCoords - UpNeighborCoords ) SelectUpNeighbors.append( (DistanceToNeighbor, UpNeighborName, UpNeighborRes, UpNeighborCoords) ) # sort by distance to atom, nearest first SelectUpNeighbors.sort() UpNeighbor1Tuple = SelectUpNeighbors[0] UpNeighbor2Tuple = SelectUpNeighbors[1] # print '\n'*2 # print 'UpRes, UpName', UpRes, UpName # print 'UpstreamHydrogens', UpstreamHydrogens # print 'SelectUpNeighbors', SelectUpNeighbors # get neighbors of upstream residues NeighborsOfDownstream = ResidueAtomNeighbors[DownIndex] SelectDownNeighbors = [] # iterates through upstream atom neighbors for references for angle for DownNeighborIndex in NeighborsOfDownstream: DownNeighborRes, DownNeighborAtm = ResAtmRecordLists[DownNeighborIndex] DownNeighborName = Pose.residue(DownNeighborRes).atom_name(DownNeighborAtm).replace(' ', '') # keep looking if neighbor is hyrdogen if 'H' in DownNeighborName: continue # skip virtual residues if Pose.residue(DownNeighborRes).is_virtual(DownNeighborAtm): continue # keep looking if neighbor is self if DownNeighborName == DownName and DownNeighborRes == DownRes: continue # keep looking if neighbor is upstream residue if DownNeighborName == UpName and DownNeighborRes == UpRes: continue DownNeighborCoords = ResAtmCoordLists[DownNeighborIndex] DistanceToNeighbor = solenoid_tools.vector_magnitude( DownXyzCoords - DownNeighborCoords ) SelectDownNeighbors.append( (DistanceToNeighbor, DownNeighborName, DownNeighborRes, DownNeighborCoords) ) # sort by distance to atom, nearest first SelectDownNeighbors.sort() DownNeighbor1Tuple = SelectDownNeighbors[0] DownNeighbor2Tuple = SelectDownNeighbors[1] # print 'DownRes, DownName', DownRes, DownName # print 'DownstreamHydrogens', DownstreamHydrogens # print 'SelectDownNeighbors', SelectDownNeighbors Distance = solenoid_tools.vector_magnitude(DownXyzCoords - UpXyzCoords) DistanceCst = 'AtomPair %s %d %s %d SCALARWEIGHTEDFUNC %f HARMONIC %.2f 1.0' %( UpName, UpRes, DownName, DownRes, SasaBasedWeight, Distance ) # Use Biopython for angle and dihedral calculations # here 'Vec' means PDB.Vector of atom's xyz coord UpstreamVec = PDB.Vector(UpXyzCoords) DownstreamVec = PDB.Vector(DownXyzCoords) UpNeighbor1Vec = PDB.Vector(UpNeighbor1Tuple[3]) UpNeighbor2Vec = PDB.Vector(UpNeighbor2Tuple[3]) DownNeighbor1Vec = PDB.Vector(DownNeighbor1Tuple[3]) DownNeighbor2Vec = PDB.Vector(DownNeighbor2Tuple[3]) Angle1 = PDB.calc_angle(UpNeighbor1Vec, UpstreamVec, DownstreamVec) AngleCst1 = 'Angle %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpNeighbor1Tuple[1], UpNeighbor1Tuple[2], UpName, UpRes, DownName, DownRes, SasaBasedWeight, Angle1 ) Angle2 = PDB.calc_angle(UpstreamVec, DownstreamVec, DownNeighbor1Vec) AngleCst2 = 'Angle %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpName, UpRes, DownName, DownRes, DownNeighbor1Tuple[1], DownNeighbor1Tuple[2], SasaBasedWeight, Angle2 ) Torsion1 = PDB.calc_dihedral(UpNeighbor2Vec, UpNeighbor1Vec, UpstreamVec, DownstreamVec) TorsionCst1 = 'Dihedral %s %d %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpNeighbor2Tuple[1], UpNeighbor2Tuple[2], UpNeighbor1Tuple[1], UpNeighbor1Tuple[2], UpName, UpRes, DownName, DownRes, SasaBasedWeight, Torsion1 ) Torsion2 = PDB.calc_dihedral(UpNeighbor1Vec, UpstreamVec, DownstreamVec, DownNeighbor1Vec) TorsionCst2 = 'Dihedral %s %d %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpNeighbor1Tuple[1], UpNeighbor1Tuple[2], UpName, UpRes, DownName, DownRes, DownNeighbor1Tuple[1], DownNeighbor1Tuple[2], SasaBasedWeight, Torsion2 ) Torsion3 = PDB.calc_dihedral(UpstreamVec, DownstreamVec, DownNeighbor1Vec, DownNeighbor2Vec) TorsionCst3 = 'Dihedral %s %d %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpName, UpRes, DownName, DownRes, DownNeighbor1Tuple[1], DownNeighbor1Tuple[2], DownNeighbor2Tuple[1], DownNeighbor2Tuple[2], SasaBasedWeight, Torsion3 ) # adds constraint to running lists of constraints Constraints.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] ) if BBBB: BackboneBackboneCst.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] ) if BBSC: BackboneSidechainCst.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] ) if SCSC: SidechainSidechainCst.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] ) # else: # print 'No hydrogen!' # sys.exit() AllConstraints.extend(Constraints) AllBackboneBackboneCst.extend(BackboneBackboneCst) AllBackboneSidechainCst.extend(BackboneSidechainCst) AllSidechainSidechainCst.extend(SidechainSidechainCst) SortedConstraints = (AllBackboneBackboneCst, AllBackboneSidechainCst, AllSidechainSidechainCst) return AllConstraints, SortedConstraints
def get_nr_pdb_list(TMPDIR): """ The main program logic to get the nonredundant list of pdb identifiers, selecting the highest resolution as representative. See module docstring at top of file for description Parameters: TMPDIR - name of temp directory to use Return value: None. Output is to stdout: list of list of pdb ids, each entry in list (line) is a list of pdb ids reprsenting a cluster; first in the inner (cluster) list is the chosen represenstative. """ pdb_dict = {} # dict of {pdbid : MethodResolution} to cache info from PDB for cluster in yield_cluster_from_file(sys.stdin): if not cluster[0].is_protein(): # since clustered, if one not, all not continue # discard non-protein sequences orig_seqlist = list(cluster.seqlist) # keep copy before deleting some cluster.discard_short_seqs() if len(cluster) > 1: # now we need to look in PDB files to find highest res X-ray struct for seq in cluster.seqlist: pdbid = seq.descr[:4].lower() if pdb_dict.has_key(pdbid): methres = pdb_dict[pdbid] seq.is_xray = methres.is_xray seq.resolution = methres.resolution else: name = "pdb" + pdbid pdbfile = os.path.join(PDBDIV_ROOT, os.path.join(pdbid[1:3], name + ".ent.gz")) tmp_pdbfilename = os.path.join(TMPDIR, name) os.system("gzip " + pdbfile + " -d -c > " + tmp_pdbfilename) pdbheader = PDB.parse_pdb_header(tmp_pdbfilename) if 'x-ray' in pdbheader['structure_method'].lower(): seq.is_xray = True seq.resolution = float(pdbheader['resolution']) methres = MethodResolution() methres.is_xray = seq.is_xray methres.resolution = seq.resolution pdb_dict[pdbid] = methres os.unlink(tmp_pdbfilename) cluster.discard_non_xray() if len(cluster) > 1: cluster.discard_lower_resolution() if len(cluster) > 1: cluster.discard_lower_similarity() if len(cluster) > 1: cluster.seqlist = [cluster.seqlist[0]] # arbitrary: use first seq repr_id = cluster.seqlist[0].descr[:6].lower() sys.stdout.write(repr_id + ": ") for seq in orig_seqlist: other_id = seq.descr[:6].lower() if other_id != repr_id: sys.stdout.write(other_id + " ") sys.stdout.write("\n")
def is_no_aa_chain(chain): """ Test if a chain contains no amino acids. """ return all([(not PDB.is_aa(r)) for r in chain])