def test_1_warnings(self): """Check warnings: Parse a flawed PDB file in permissive mode. NB: The try/finally block is adapted from the warnings.catch_warnings context manager in the Python 2.6 standard library. """ warnings.simplefilter('always', PDBConstructionWarning) try: # Equivalent to warnings.catch_warnings -- hackmagic orig_showwarning = warnings.showwarning all_warns = [] def showwarning(*args, **kwargs): all_warns.append(args[0]) warnings.showwarning = showwarning # Trigger warnings p = PDBParser(PERMISSIVE=True) p.get_structure("example", "PDB/a_structure.pdb") for wrn, msg in zip(all_warns, [ # Expected warning messages: 'Atom N defined twice in residue <Residue ARG het= resseq=2 icode= > at line 19.', 'disordered atom found with blank altloc before line 31.', "Residue (' ', 4, ' ') redefined at line 41.", "Blank altlocs in duplicate residue SER (' ', 4, ' ') at line 41.", "Residue (' ', 10, ' ') redefined at line 73.", "Residue (' ', 14, ' ') redefined at line 104.", "Residue (' ', 16, ' ') redefined at line 133.", "Residue (' ', 80, ' ') redefined at line 631.", "Residue (' ', 81, ' ') redefined at line 644.", 'Atom O defined twice in residue <Residue HOH het=W resseq=67 icode= > at line 820.' ]): self.assertTrue(msg in str(wrn)) finally: warnings.showwarning = orig_showwarning
def experimental_method(pdb_path): """ Get String representation of Experimental method used file of interest. Use header for this information. :param pdb_path: Path to PDB file :return: """ parser = PDBParser(get_header=True) parser.get_structure('', pdb_path) return parser.get_header()['structure_method']
def compare_structure(reference, alternate): parser=PDBParser() ref_struct=parser.get_structure('Reference', \ path.join(PDBdir, reference)) alt_struct= parser.get_structure("Alternate", \ path.join(PDBdir, alternate)) ref_model=ref_struct[0] ref_chain=ref_model['A'] alt_model=alt_struct[0] alt_chain=alt_model['A'] ref_atoms=[] alt_atoms=[] for ref_res in ref_chain: if(ref_res.get_resname() in resdict.keys()): ref_atoms.append(ref_res['CA']) for alt_res in alt_chain: if(alt_res.get_resname() in resdict.keys()): alt_atoms.append(alt_res['CA']) super_imposer= Superimposer() super_imposer.set_atoms(ref_atoms, alt_atoms) super_imposer.apply(alt_model.get_atoms()) make_pdb_file(alt_struct, "Aligned_" + alternate) full= super_imposer.rms super_imposer_50= Superimposer() super_imposer_50.set_atoms(ref_atoms[:50], alt_atoms[:50]) super_imposer_50.apply(alt_model.get_atoms()) make_pdb_file(alt_struct, "Aligned_50_" + alternate) f_50= super_imposer_50.rms super_imposer_150= Superimposer() super_imposer_150.set_atoms(ref_atoms[:150], alt_atoms[:150]) super_imposer_150.apply(alt_model.get_atoms()) make_pdb_file(alt_struct, "Aligned_150_" + alternate) f_150= super_imposer_150.rms return f_50, f_150, full, len(ref_atoms)
def read_structure(pdb_path, structure_id, quiet=True): """Reads in a PDB structure. Will read gzip compressed PDB structures. Parameters ---------- pdb_path : str path to pdb file to read structure_id : str structure id of pdb file Returns ------- structure : Bio.PDB structure object | None returns PDB structure if possible else none """ pdb_parser = PDBParser(QUIET=quiet) # parser for pdb files # skip if there is no pdb for it if not pdb_path: logger.debug('Skipping pdb {0}'.format(structure_id)) return None # read in pdb file try: # handle gziped or uncompressed reading if pdb_path.endswith('.gz'): with gzip.open(pdb_path, 'rb') as handle: structure = pdb_parser.get_structure(structure_id, handle) else: structure = pdb_parser.get_structure(structure_id, pdb_path) # fix homology model chain letters to be "A" instead of " " for model in structure: for chain in model: if chain.id == " ": chain.id = "A" del model.child_dict[' '] model.child_dict['A'] = chain return structure except KeyboardInterrupt: # stop if they kill program raise except: logger.info('Failed reading in structure {0}'.format(structure_id)) return None
def test_get_sequence_from_pdb_structure(self): pdb_file = "./test.pdb" p = PDBParser() structure = p.get_structure('test', pdb_file) structure_of_chain = structure[0]['A'] sequence = construct_protein_graph.get_sequence_from_pdb_structure(structure_of_chain) self.assertEqual("VNIKTNPFK", sequence)
def selectChain(ifn, ofn, chainID='A'): parser = PDBParser() structure = parser.get_structure('x', ifn) class ChainSelector(): def __init__(self, chainID=chainID): self.chainID = chainID def accept_chain(self, chain): if chain.get_id() == self.chainID: return 1 return 0 def accept_model(self, model): return 1 def accept_residue(self, residue): return 1 def accept_atom(self, atom): return 1 sel = ChainSelector(chainID) io = PDBIO() io.set_structure(structure) io.save(ofn, sel)
def load_PDB_to_system(self, filename = None): parser = PDBParser(QUIET=True) structure = parser.get_structure('X', filename) self.residues = [] for model in structure: c = 1 for chain in model: self.id = 1 #self.name = "protein" n = 1 r = 1 for pdb_residue in chain: residue = Residue(id=r, name=pdb_residue.resname) for pdb_atom in pdb_residue: atom = Atom(id=n, name=pdb_atom.name, pos=pdb_atom.coord) n += 1 residue.atoms.append(atom) self.residues.append(residue) r += 1
def test_conversion(self): """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare""" cif_parser = MMCIFParser(QUIET=1) cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif") pdb_writer = PDBIO() pdb_writer.set_structure(cif_struct) filenumber, filename = tempfile.mkstemp() pdb_writer.save(filename) pdb_parser = PDBParser(QUIET=1) pdb_struct = pdb_parser.get_structure('example_pdb', filename) # comparisons self.assertEqual(len(pdb_struct), len(cif_struct)) pdb_atom_names = [a.name for a in pdb_struct.get_atoms()] cif_atom_names = [a.name for a in cif_struct.get_atoms()] self.assertEqual(len(pdb_atom_names), len(cif_atom_names)) self.assertSequenceEqual(pdb_atom_names, cif_atom_names) pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()] cif_atom_elems = [a.element for a in cif_struct.get_atoms()] self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)
def test_to_string(self): """Write structure as string""" stream = StringIO() stream.write(dummy_1) stream.seek(0) mol = MolProcesser(stream) n_models = sum(1 for _ in mol.structure.get_models()) #1 n_chains = sum(1 for _ in mol.structure.get_chains()) #2 n_resids = sum(1 for _ in mol.structure.get_residues()) #2 n_atoms = sum(1 for _ in mol.structure.get_atoms()) #15 has_docc = sum(1 for a in mol.structure.get_atoms() if a.is_disordered()) has_hatm = sum(1 for r in mol.structure.get_residues() if r.id[0] != ' ') stream_2 = StringIO() stream_2.write(mol.tostring) stream_2.seek(0) p = PDBParser(QUIET=1) mol_2 = p.get_structure('xyz', stream_2) n_models_2 = sum(1 for _ in mol_2.get_models()) #1 n_resids_2 = sum(1 for _ in mol_2.get_residues()) #2 n_atoms_2 = sum(1 for _ in mol_2.get_atoms()) #15 has_docc_2 = sum(1 for a in mol_2.get_atoms() if a.is_disordered()) has_hatm_2 = sum(1 for r in mol_2.get_residues() if r.id[0] != ' ') self.assertEqual(n_models, n_models_2) self.assertEqual(n_resids, n_resids_2) self.assertEqual(n_atoms, n_atoms_2) self.assertEqual(has_docc, has_docc_2) self.assertEqual(has_hatm, has_hatm_2)
def chain2pos_scan_str(chain, pdb, mutation_set='a'): """ Takes a chain ID and a model.PDBFile object, returns a string suitable as the PositionScan line for FoldX. """ parser = PDBParser(PERMISSIVE=1) pdbfn = pdb.fullpath() struct = parser.get_structure(pdb.uuid, pdbfn)[0] #chains = pdb_extract_chain_seqs(struct) chainlist = Selection.unfold_entities(struct, 'C') position_scan_str = '' for c in chainlist: if c.id == chain: for r in c: try: aa = three_to_one(r.get_resname()) resnum = r.id[1] position_scan_str += '%s%s%i%s,' % (aa, chain, resnum, mutation_set) except: # non-native amino acid or water pass position_scan_str = position_scan_str[:-1] return position_scan_str
def test_NACCESS(self): """Test NACCESS generation from PDB""" p = PDBParser() pdbfile = "PDB/1A8O.pdb" model = p.get_structure("1A8O", pdbfile)[0] naccess = NACCESS(model, pdbfile) self.assertEqual(len(naccess), 66)
def run(self): mypath = self.getPath() lig_ifn = mypath.sdf prt_ifn = mypath.pdb lig_ext = os.path.basename(lig_ifn).split('.')[-1] lig = pybel.readfile(lig_ext, lig_ifn).next() lig.removeh() parser = PDBParser(QUIET=True) structure = parser.get_structure('prt', prt_ifn) typetable = OBTypeTable() typetable.SetFromType('INT') typetable.SetToType('SYB') dat = [] atom_types = [typetable.Translate(atom.type) for atom in lig.atoms] atom_types = shuffle(atom_types) for residue in structure.get_residues(): dists = residueDistances2LigandAtoms(residue, lig) dat.append({"dists": dists, "atom_types": atom_types, "residue": residue.get_resname()}) to_write = json.dumps(dat, indent=4, separators=(',', ':')) with self.output().open('w') as ofs: ofs.write(to_write)
def RemoveLigandsOneBioUnit(biounit, ligandlist): # ligandlist is a residue list with residue chain id, name and residue number p = PDBParser(PERMISSIVE = 1) pdbname= biounit.split("/")[-1] try: models = p.get_structure(pdbname, biounit) except: return None #for model in models: # for chain in model: # for residue in chain: # print residue for rligand in ligandlist: for model in models: for chain in model: for residue in list(chain): if chain.id == rligand["ChainID"] and int(rligand["ResNum"]) == residue.id[1]: chain.detach_child(residue.id) elif residue.id[0] == "W": chain.detach_child(residue.id) elif len(rligand["LigName"].split()) > 1 and int(rligand["ResNum"]) <= residue.id[1]: LongLigand(chain, residue, rligand) io = PDBIO() io.set_structure(models) filepath = os.path.join(BIOSTRDIR, models.id) io.save(filepath)
def parse_freesasa_output(fpath): """ Returns per-residue relative accessibility of side-chain and main-chain atoms as calculated by freesasa. """ asa_data, rsa_data = {}, {} _rsa = rel_asa _bb = set(('CA', 'C', 'N', 'O')) P = PDBParser(QUIET=1) s = P.get_structure('bogus', fpath.name) for res in s.get_residues(): res_id = (res.parent.id, res.resname, res.id[1]) asa_mc, asa_sc, total_asa = 0, 0, 0 for atom in res: aname = atom.name at_id = (res.parent.id, res.resname, res.id[1], aname) asa = atom.bfactor # if atom.name in _bb: # asa_mc += asa # else: # asa_sc += asa total_asa += asa asa_data[at_id] = asa rsa_data[res_id] = total_asa / _rsa['total'][res.resname] return asa_data, rsa_data
def parse_structure(path): """ Parses a structure using Biopython's PDB/mmCIF Parser Verifies the integrity of the structure (gaps) and its suitability for the calculation (is it a complex?). """ # setup logging logger = logging.getLogger('Prodigy') logger.info('[+] Reading structure file: {0}'.format(path)) fname = os.path.basename(path) sname = '.'.join(fname.split('.')[:-1]) s_ext = fname.split('.')[-1] _ext = {'pdb', 'ent', 'cif'} if s_ext not in _ext: raise IOError('[!] Structure format \'{0}\' is not supported. Use \'.pdb\' or \'.cif\'.'.format(s_ext)) sparser = PDBParser(QUIET=1) if s_ext in {'pdb', 'ent'} else MMCIFParser() try: s = sparser.get_structure(sname, path) except Exception as exeption: logger.error('[!] Structure \'{0}\' could not be parsed'.format(sname), file=sys.stderr) raise Exception(exeption) return (validate_structure(s), len(set([c.id for c in s.get_chains()])), len(list(s.get_residues())))
def test_dssp(self): """Test DSSP generation from PDB.""" p = PDBParser() pdbfile = "PDB/2BEG.pdb" model = p.get_structure("2BEG", pdbfile)[0] dssp = DSSP(model, pdbfile) self.assertEqual(len(dssp), 130)
def main(): p = PDBParser() filename = "pdb10gs.ent" models = p.get_structure("10GS", filename) for model in models: print models[0] print model.get_full_id() TestDSSP(models[0], filename)
def test_3_bad_xyz(self): """Check error: Parse an entry with bad x,y,z value.""" data = "ATOM 9 N ASP A 152 21.554 34.953 27.691 1.00 19.26 N\n" parser = PDBParser(PERMISSIVE=False) s = parser.get_structure("example", StringIO(data)) data = "ATOM 9 N ASP A 152 21.ish 34.953 27.691 1.00 19.26 N\n" self.assertRaises(PDBConstructionException, parser.get_structure, "example", StringIO(data))
def main(): p = PDBParser() filename = "test/10gs.bio1" models = p.get_structure("10gs", filename) for model in models: print models[0] print model.get_full_id() TestNACCESS(models[0], filename)
def build_all_angles_model(pdb_filename): parser=PDBParser() structure=parser.get_structure('sample', \ path.join(PDBdir, pdb_filename)) model=structure[0] chain=model['A'] model_structure_geo=[] prev="0" N_prev="0" CA_prev="0" CO_prev="0" prev_res="" rad=180.0/math.pi for res in chain: if(res.get_resname() in resdict.keys()): geo=Geometry.geometry(resdict[res.get_resname()]) if(prev=="0"): N_prev=res['N'] CA_prev=res['CA'] C_prev=res['C'] prev="1" else: n1=N_prev.get_vector() ca1=CA_prev.get_vector() c1=C_prev.get_vector() C_curr=res['C'] N_curr=res['N'] CA_curr=res['CA'] c=C_curr.get_vector() n=N_curr.get_vector() ca=CA_curr.get_vector() geo.CA_C_N_angle=calc_angle(ca1, c1, n)*rad geo.C_N_CA_angle=calc_angle(c1, n, ca)*rad psi= calc_dihedral(n1, ca1, c1, n) ##goes to current res omega= calc_dihedral(ca1, c1, n, ca) ##goes to current res phi= calc_dihedral(c1, n, ca, c) ##goes to current res geo.psi_im1=psi*rad geo.omega=omega*rad geo.phi=phi*rad geo.N_CA_C_angle= calc_angle(n, ca, c)*rad ##geo.CA_C_O_angle= calc_angle(ca, c, o)*rad ##geo.N_CA_C_O= calc_dihedral(n, ca, c, o)*rad N_prev=res['N'] CA_prev=res['CA'] C_prev=res['C'] ##O_prev=res['O'] model_structure_geo.append(geo) return model_structure_geo
def getPdbAtomsBySerialNum(pdb_fn, serial_nums): parser = PDBParser(QUIET=True) structure = parser.get_structure('x', pdb_fn) atoms = {atom.serial_number : atom for atom in structure.get_atoms()} re_ordered = [] for num in serial_nums: re_ordered.append(atoms[num]) return re_ordered
def pdb2dfromactivesite(pdb_fh,active_sites=[]): """ This calculates distances between each ligand atom or optionally provided amino acids (sources) and each residue in the protein. :param pdb_fh: path to .pdb file. :param active_sites: optional list of residue numbers as sources. :returns dfromligands: pandas table with distances from ligand """ junk_residues = ["HOH"," MG","CA"," NA","SO4","IOD","NA","CL","GOL","PO4"] pdb_parser=PDBParser() pdb_data=pdb_parser.get_structure("pdb_name",pdb_fh) model = pdb_data[0] chainA = model["A"] #only a chain residues = list(chainA.get_residues()) ligands_residue_objs=[] for residue in chainA: if not residue.get_resname() in junk_residues: if not residue.get_resname() in aas_21_3letter: #only aas ligands_residue_objs.append(residue) elif residue.id[1] in active_sites: ligands_residue_objs.append(residue) dfromligands=pd.DataFrame() for ligandi in range(len(ligands_residue_objs)): ligand_residue_obj=ligands_residue_objs[ligandi] for ligand_atom_obj in ligand_residue_obj: for residue in chainA: if residue.get_resname() in aas_21_3letter: #only aas dfromligands.loc[residue.id[1],"ref_pdb"]=residue.get_resname() if not ligand_residue_obj.get_resname() in aas_21_3letter: dfromligands.loc[residue.id[1],"Distance from Ligand: %s (ATOM: %s)" % \ (ligand_residue_obj.get_resname(),ligand_atom_obj.get_name())]\ =ligand_residue_obj[ligand_atom_obj.get_name()]-residue["CA"] else: dfromligands.loc[residue.id[1],"Distance from active site residue: %s %d (ATOM: %s)" % \ (ligand_residue_obj.get_resname(),ligand_residue_obj.get_id()[1],\ ligand_atom_obj.get_name())]\ =ligand_residue_obj[ligand_atom_obj.get_name()]-residue["CA"] dfromligands.index.name="aasi" if "ref_pdb" in dfromligands: del dfromligands["ref_pdb"] #average and minimum distances cols_all=dfromligands.columns.tolist() for moltype in ['Distance from Ligand:','Distance from active site residue:']: cols_moltype=[c for c in cols_all if moltype in c] if len(cols_all)>0: dfromligands.loc[:,'%s average' % moltype]=dfromligands.loc[:,cols_moltype].T.mean() dfromligands.loc[:,'%s minimum' % moltype]=dfromligands.loc[:,cols_moltype].T.min() mols=np.unique([c[c.find(moltype):c.find(' (ATOM')] for c in cols_moltype]) if len(mols)>1: for mol in mols: cols_mol=[c for c in cols_moltype if mol in c] dfromligands.loc[:,'%s: average' % mol]=dfromligands.loc[:,cols_mol].T.mean() dfromligands.loc[:,'%s: minimum' % mol]=dfromligands.loc[:,cols_mol].T.min() return dfromligands
class PDBAtomAtomDistanceReader(object): def __init__(self, pdbname, label): self.pdbname = pdbname self.label = label self.region = label.translate(None, '0123456789') self.parser = PDBParser(QUIET=True) self.structure = self._load_structure() self.models = [PDBModel(model) for model in self.structure.get_list()] def _load_structure(self): return self.parser.get_structure(self.label, self.pdbname) def get_single_distance(self, p1, p2, exclude_backbone=False, CA_only=False): distances = [] for model in self.models: dist_info = model.get_single_distance(p1, p2, exclude_backbone, CA_only) distances.append(dist_info) distances.sort(key=lambda x: x['dist']) min_dist = distances[0] max_dist = distances[-1] avg_dist = np.array([d['dist'] for d in distances]).mean() return {'protein': self.region, 'p1': p1, 'p2': p2, 'label': self.label, 'r1': min_dist['r1'], 'r2': min_dist['r2'], 'min_dist': min_dist, 'avg_dist': avg_dist, 'max_dist': max_dist} def get_pair_distances(self): distances = [] print "Calculating distances..." for model in self.models: dist_info = model.get_pair_distances() distances.append(dist_info) if len(self.models) == 1: return [k + v for k, v in distances[0].iteritems()] final_distances = {} residues = self.models[0].residues for r1, r2 in combinations(residues, 2): p1 = r1.get_id()[1] p2 = r2.get_id()[1] pair = (p1, p2) final_distances[pair] = min([d[pair] for d in distances]) final_distances = [k + v for k, v in final_distances.iteritems()] return final_distances
def _get_resmapping(self): res_mapping = [] filepath = self._get_filepath('', pdb_file=True) p = PDBParser(QUIET=True) structure = p.get_structure('protein', filepath) chain = structure[0]['A'] for residue in chain.get_residues(): if str(residue.id[1]) in self.resnums: res_mapping.append((self.codes[residue.resname], residue.id[1])) return res_mapping
def _get_ligand_name(self): p = PDBParser(QUIET=True) ligand = p.get_structure('ligand', self.out_filename) chain = ligand[0]['A'] for residue in chain.get_residues(): if residue.resname in self.ignore: pass else: self.ligands.append(residue.resname) print "Ligands found: ", self.ligands
def test_fragment_mapper(self): """Self test for FragmentMapper module.""" p = PDBParser() pdb1 = "PDB/1A8O.pdb" s = p.get_structure("X", pdb1) m = s[0] fm = FragmentMapper(m, 10, 5, "PDB") for r in Selection.unfold_entities(m, "R"): if r in fm: self.assertTrue(str(fm[r]).startswith("<Fragment length=5 id="))
def test_empty(self): """Parse an empty file.""" parser = PDBParser() filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: struct = parser.get_structure('MT', filename) # Structure has no children (models) self.assertFalse(len(struct)) finally: os.remove(filename)
def check_msms(self, prot_file, first_100_residues): p = PDBParser() with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) s = p.get_structure("X", prot_file) model = s[0] rd = ResidueDepth(model) res_chain = '' for item in rd.property_list[:100]: res_chain = res_chain + item[0].get_resname() self.assertEqual(res_chain, first_100_residues)
def test_model_numbering(self): """Preserve model serial numbers during I/O.""" def confirm_numbering(struct): self.assertEqual(len(struct), 20) for idx, model in enumerate(struct): self.assertTrue(model.serial_num, idx + 1) self.assertTrue(model.serial_num, model.id + 1) parser = PDBParser() struct1 = parser.get_structure("1mot", "PDB/1MOT.pdb") confirm_numbering(struct1) # Round trip: serialize and parse again io = PDBIO() io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename) struct2 = parser.get_structure("1mot", filename) confirm_numbering(struct2) finally: os.remove(filename)
def open_pdb(pdbfn): """Open pdb with Biopython. Args: pdbfn1 (str): a path to a pdb structure Returns: PDB Biopython object: with a pdb structure """ parser = PDBParser() return parser.get_structure('', pdbfn)
def ligand_com(refinement_input, ligand_chain): """ Calculate ligand's center of mass. Parameters ------------ refinement_input : str Path to PDB file. ligand_chain : str Ligand chain ID. Returns -------- output : list[float] Center of mass vector. """ parser = PDBParser() output = [] refinement_input = glob.glob(refinement_input) for inp in refinement_input: structure = parser.get_structure("inp", inp) mass = 0.0 com = np.zeros(3) for res in structure.get_residues(): if res.resname == ligand_chain: for atom in res.get_atoms(): com = com + np.array(list(atom.get_vector())) * atom.mass mass += atom.mass com = com / mass output.append(com.tolist()) return output
##0:PDB ID ##1:Chain (default A)r ##2: Residue ##3: Distance select = csvlist[0][0] achn = csvlist[1][0] mk = csvlist[3][0] for i in range(0, len(csvlist[2])): ares.append(csvlist[2][i]) #Opening the file pdbl = PDBList() pdbl.retrieve_pdb_file(select, pdir='pdb') file_path = filebase + '/pdb/pdb' + select + '.ent' #Read the file parser = PDBParser(QUIET=1) structure = parser.get_structure('test', file_path) #Residue Info rf = open(os.path.join(outfilebase, "residue_list" + '.csv'), 'wt') reswriter = csv.writer(rf, lineterminator='\n') reswriter.writerow( ["-------------------------------------------------------------------"]) reswriter.writerow( ["-------------------------------------------------------------------"]) model = structure[0] chain = model[achn] for res in chain.get_residues(): tags = res.get_full_id() if res.get_resname() != 'HOH' and tags[3][0] == " ": resname.append(res.get_resname()) resid = res.get_id() rescode.append(resid[1])
ftp.cwd("/pub/pdb/data/structures/all/pdb") filenames = [] ftp.retrlines('NLST', callback=lambda line: filenames.append(line)) print("files: %s" % len(filenames)) for filename in filenames: print("downloading %s ..." % filename) with open(filename, 'wb') as fp: ftp.retrbinary("RETR %s" % filename, callback=fp.write) print("processing: %s" % filename) p = PDBParser() with gzip.open(filename, 'rt') as f: structure = p.get_structure("", f) pdb_id = structure.header["idcode"] assert pdb_id, "no PDB ID for %s" % filename model = structure[0] try: dssp = DSSP(model, filename, dssp="/Users/luis/dssp-2.3.0/mkdssp") except Exception as e: print(e) print() os.remove(filename) continue
def extract_beads(pdb_path): amino_acids = pd.read_csv('/home/hyang/bio/erf/data/amino_acids.csv') vocab_aa = [x.upper() for x in amino_acids.AA3C] vocab_dict = { x.upper(): y for x, y in zip(amino_acids.AA3C, amino_acids.AA) } p = PDBParser() structure = p.get_structure('X', pdb_path) residue_list = Selection.unfold_entities(structure, 'R') ca_center_list = [] cb_center_list = [] res_name_list = [] res_num_list = [] chain_list = [] for res in residue_list: if res.get_resname() not in vocab_aa: # raise ValueError('protein has non natural amino acids') continue try: res['CA'].get_coord() if res.get_resname() != 'GLY': res['CB'].get_coord() except KeyError: print(f'{pdb_path}, {res} missing CA / CB atoms') continue chain_list.append(res.parent.id) res_name_list.append(vocab_dict[res.get_resname()]) res_num_list.append(res.id[1]) ca_center_list.append(res['CA'].get_coord()) if res.get_resname() != 'GLY': cb_center_list.append(res['CB'].get_coord()) else: cb_center_list.append(res['CA'].get_coord()) ca_center = np.vstack(ca_center_list) cb_center = np.vstack(cb_center_list) df = pd.DataFrame({ 'chain_id': chain_list, 'group_num': res_num_list, 'group_name': res_name_list, 'x': ca_center[:, 0], 'y': ca_center[:, 1], 'z': ca_center[:, 2], 'xcb': cb_center[:, 0], 'ycb': cb_center[:, 1], 'zcb': cb_center[:, 2] }) # assign "chain" number for the energy calculation chain = np.zeros(df.shape[0], dtype=np.int) chain_id = df['chain_id'].values group_num = df['group_num'].values count = 0 chain_0 = chain_id[0] group_0 = group_num[0] # if type(group_0) is str: # print(pdb_id, 'group_num has string') for i in range(1, df.shape[0]): chain_i = chain_id[i] group_i = group_num[i] if (chain_i == chain_0) & (group_i == group_0 + 1): group_0 += 1 else: count += 1 chain_0 = chain_i group_0 = group_i chain[i] = count df['chain'] = chain df.to_csv(f'{pdb_path}_bead.csv', index=False)
def handle(self, *args, **options): self.options = options if self.options['purge']: Residue.objects.filter( protein_conformation__protein__entry_name__endswith='_a', protein_conformation__protein__family__parent__parent__name= 'Alpha').delete() ProteinConformation.objects.filter( protein__entry_name__endswith='_a', protein__family__parent__parent__name='Alpha').delete() Protein.objects.filter( entry_name__endswith='_a', family__parent__parent__name='Alpha').delete() # Building protein and protconf objects for g protein structure in complex scs = SignprotComplex.objects.all() for sc in scs: self.logger.info( 'Protein, ProteinConformation and Residue build for alpha subunit of {} is building' .format(sc)) try: # Alpha subunit try: alpha_protein = Protein.objects.get( entry_name=sc.structure.pdb_code.index.lower() + '_a') except: alpha_protein = Protein() alpha_protein.entry_name = sc.structure.pdb_code.index.lower( ) + '_a' alpha_protein.accession = None alpha_protein.name = sc.structure.pdb_code.index.lower( ) + '_a' alpha_protein.sequence = sc.protein.sequence alpha_protein.family = sc.protein.family alpha_protein.parent = sc.protein alpha_protein.residue_numbering_scheme = sc.protein.residue_numbering_scheme alpha_protein.sequence_type = ProteinSequenceType.objects.get( slug='mod') alpha_protein.source = ProteinSource.objects.get( name='OTHER') alpha_protein.species = sc.protein.species alpha_protein.save() try: alpha_protconf = ProteinConformation.objects.get( protein__entry_name=sc.structure.pdb_code.index.lower( ) + '_a') except: alpha_protconf = ProteinConformation() alpha_protconf.protein = alpha_protein alpha_protconf.state = ProteinState.objects.get( slug='active') alpha_protconf.save() pdbp = PDBParser(PERMISSIVE=True, QUIET=True) s = pdbp.get_structure('struct', StringIO(sc.structure.pdb_data.pdb)) chain = s[0][sc.alpha] nums = [] for res in chain: try: res['CA'] nums.append(res.get_id()[1]) except: pass resis = Residue.objects.filter( protein_conformation__protein=sc.protein) num_i = 0 temp_seq2 = '' pdb_num_dict = OrderedDict() # Create first alignment based on sequence numbers for n in nums: if sc.structure.pdb_code.index == '6OIJ' and n < 30: nr = n + 6 else: nr = n pdb_num_dict[n] = [chain[n], resis.get(sequence_number=nr)] # Find mismatches mismatches = [] for n, res in pdb_num_dict.items(): if AA[res[0].get_resname()] != res[1].amino_acid: mismatches.append(res) pdb_lines = sc.structure.pdb_data.pdb.split('\n') seqadv = [] for l in pdb_lines: if l.startswith('SEQADV'): seqadv.append(l) mutations, shifted_mutations = OrderedDict(), OrderedDict() # Search for annotated engineered mutations in pdb SEQADV for s in seqadv: line_search = re.search( 'SEQADV\s{1}[A-Z\s\d]{4}\s{1}([A-Z]{3})\s{1}([A-Z]{1})\s+(\d+)[\s\S\d]{5}([\s\S\d]{12})([A-Z]{3})\s+(\d+)(\s\S+)', s) if line_search != None: if line_search.group(2) == sc.alpha: if line_search.group( 4).strip() == sc.protein.accession: if line_search.group(3) == line_search.group( 6): mutations[int(line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] else: shifted_mutations[int( line_search.group(3))] = [ line_search.group(1), line_search.group(5), int(line_search.group(6)) ] else: # Exception for 6G79 if line_search.group(3) != line_search.group( 6) and 'CONFLICT' in line_search.group( 7): mutations[int(line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] # Exception for 5G53 if line_search.group( 4).strip() != sc.protein.accession: mutations[int(line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] remaining_mismatches = [] # Check and clear mismatches that are registered in pdb SEQADV as engineered mutation for m in mismatches: num = m[0].get_id()[1] if num in mutations: if m[0].get_resname() != mutations[num][0] and m[ 1].amino_acid != AA[mutations[num][1]]: remaining_mismatches.append(m) elif num in shifted_mutations: remaining_mismatches.append(m) else: remaining_mismatches.append(m) ### sanity check # print(mutations) # print(shifted_mutations) # print(mismatches) # print(remaining_mismatches) # pprint.pprint(pdb_num_dict) # Mismatches remained possibly to seqnumber shift, making pairwise alignment to try and fix alignment if len(remaining_mismatches ) > 0 and sc.structure.pdb_code.index not in [ '6OIJ', '6OY9', '6OYA' ]: ppb = PPBuilder() seq = '' for pp in ppb.build_peptides(chain, aa_only=False): seq += str(pp.get_sequence()) pw2 = pairwise2.align.localms(sc.protein.sequence, seq, 2, -1, -.5, -.1) ref_seq, temp_seq = str(pw2[0][0]), str(pw2[0][1]) wt_pdb_dict = OrderedDict() pdb_wt_dict = OrderedDict() j, k = 0, 0 for i, ref, temp in zip(range(0, len(ref_seq)), ref_seq, temp_seq): if ref != '-' and temp != '-': wt_pdb_dict[resis[j]] = pdb_num_dict[nums[k]] pdb_wt_dict[pdb_num_dict[nums[k]][0]] = resis[j] j += 1 k += 1 elif ref == '-': wt_pdb_dict[i] = pdb_num_dict[nums[k]] pdb_wt_dict[pdb_num_dict[nums[k]][0]] = i k += 1 elif temp == '-': wt_pdb_dict[resis[j]] = i pdb_wt_dict[i] = resis[j] j += 1 for i, r in enumerate(remaining_mismatches): # Adjust for shifted residue when residue is a match if r[0].get_id()[1] - remaining_mismatches[ i - 1][0].get_id()[1] > 1: pdb_num_dict[r[0].get_id()[1] - 1][1] = pdb_wt_dict[chain[ r[0].get_id()[1] - 1]] # Adjust for shifted residue when residue is mutated and it's logged in SEQADV if r[0].get_id()[1] in shifted_mutations: pdb_num_dict[r[0].get_id()[1]][1] = resis.get( sequence_number=shifted_mutations[ r[0].get_id()[1]][2]) # Adjust for shift else: pdb_num_dict[r[0].get_id()[1]][1] = pdb_wt_dict[ r[0]] bulked_residues = [] for key, val in pdb_num_dict.items(): # print(key, val) # sanity check res_obj = Residue() res_obj.sequence_number = val[0].get_id()[1] res_obj.amino_acid = AA[val[0].get_resname()] res_obj.display_generic_number = val[ 1].display_generic_number res_obj.generic_number = val[1].generic_number res_obj.protein_conformation = alpha_protconf res_obj.protein_segment = val[1].protein_segment bulked_residues.append(res_obj) Residue.objects.bulk_create(bulked_residues) self.logger.info( 'Protein, ProteinConformation and Residue build for alpha subunit of {} is finished' .format(sc)) except Exception as msg: print( 'Protein, ProteinConformation and Residue build for alpha subunit of {} has failed' .format(sc)) print(msg) self.logger.info( 'Protein, ProteinConformation and Residue build for alpha subunit of {} has failed' .format(sc))
offset=atom_nr-1 print('Offset=', offset,'\nResidue Offset=',residue_offset) for line in open(ligand_name): pdblist = line.split() pdblist[1]=atom_nr pdblist[5]=str(int(residue_offset)+int(pdblist[5])) print('\t'.join(map(str, pdblist)), file=output) atom_nr+=1 # create parser to find distance between C-alpha atoms of ligand and all C-alpha atoms of receptor parser = PDBParser() alphabet_str= string.ascii_uppercase alphabet_list=list(alphabet_str) # read structure of ligand from file structure = parser.get_structure('LIGAND',ligand_name) model_ligand = structure[0] for i in list(string.ascii_uppercase): try: chain_ligand = model_ligand[i] except KeyError: continue # read structure of receptor from file structure = parser.get_structure('RECEPTOR', receptor_name) model_receptor = structure[0] for i in list(string.ascii_uppercase): try: chain_receptor = model_receptor[i] except KeyError: continue
from Bio.PDB import PDBParser from numpy import std, average import seaborn as sns import matplotlib.pyplot as plt import pandas as pd OmpA_beta_sheet_pairs = [(8, 42), (10, 40), (12, 38), (14, 36), (16, 34), (52, 80), (54, 78), (75, 103), (77, 101), (79, 99), (81, 97), (83, 95), (85, 93)] parser = PDBParser() #structure = parser.get_structure('OmpA', '/homes/retel/qj/pdb1qjp.ent') structure = parser.get_structure('OmpA', 'pdb2ge4.ent') #nuclei = ['CA', 'CB', 'C', 'HA', 'H'] nuclei = ['H'] def calculate_distances(): #nuclei = ['CA', 'CB', 'C'] #nuclei = ['CA', 'CB', 'C', 'N', 'H', 'HA', 'HB'] #nuclei = ['H', 'HA', 'HB'] intra = {} sequential = {} longrange1 = {} longrange2 = {} for chain in structure.get_chains():
if not (L == len(ss_seq)): raise ValueError("Length mismatch %i %i" % (L, len(ss_seq))) for i in range(0, L): residues[i].xtra["SS_PSEA"] = ss_seq[i] # os.system("rm "+fname) class PSEA(object): def __init__(self, model, filename): ss_seq = psea(filename) ss_seq = psea2HEC(ss_seq) annotate(model, ss_seq) self.ss_seq = ss_seq def get_seq(self): """Return secondary structure string.""" return self.ss_seq if __name__ == "__main__": import sys from Bio.PDB import PDBParser # Parse PDB file p = PDBParser() s = p.get_structure('X', sys.argv[1]) # Annotate structure with PSEA sceondary structure info PSEA(s[0], sys.argv[1])
#!/usr/bin/env python # coding: utf-8 # In[1]: import sys prot_id = "5AGY.pdb" prot_file = sys.argv[1] #On va utiliser le parser de Biopython qui nous permet d'accéder aux éléments d'un fichier PDB. from Bio.PDB import PDBParser parser = PDBParser(PERMISSIVE=1) structure = parser.get_structure(prot_id, prot_file) model = structure[0] if "-h" in sys.argv or "--help" in sys.argv: print( "Ce programme identifie les liaisons cations-pi à partir d'un fichier Protein Data Bank (PDB). Les critères pris en compte proviennent du Protein Interaction Calculator que l'on peut retrouver en suivant le lien : http://pic.mbu.iisc.ernet.in/PIC_Criteria.pdf. Le parser de Biopython est strucutré de la manière suivante : Structure/model/chain/residu/atome." ) print("Fonctions utilisées :") print('parser.get_structure --> ', 'Creation of a structure object from a PDB file') print( 'objet.get_name --> ', 'Renvoie le nom correspondant à l objet : Structure/model/chain/residu/atome' ) print('parser.get_structur -->e ', 'Renvoie le numéro rattaché au résidue dans le fichier PDB') print('')
from Bio.PDB import PDBParser from Bio.PDB.DSSP import DSSP #Read in and Parse PDB file to obtain DSSP --> secondary structure determination #follows the basic outline on biopython.org -- tutorial parse = PDBParser() struc = parse.get_structure('6hrc', "6hrc.pdb") model = struc[0] dssp = DSSP(model, '6hrc.pdb') sec_struc = '' a_helix = 0 b_sheet = 0 other = 0 none = 0 key = list(dssp.keys())[2] dssp[key] for c in range(len(dssp)): key = list(dssp.keys())[c] sec_struc += dssp[key][2] if dssp[key][2] == "H" or dssp[key][2] == "G" or dssp[key][2] == "I": a_helix += 1 if dssp[key][2] == "E" or dssp[key][2] == "B": b_sheet += 1 if dssp[key][2] == "-": none += 1 else: other += 1
def __init__(self, xyz=None, r=None, xyzr=None, xyzrg=None, g=None, pdb=None, bv=None, mesh=None, name=None, spheres_file=None): """ A Spheres object contains a list of xyz centers with r radii and g groups. It can be defined using xyzrg, xyzr (and optionally g), xyz (and optionally r or g), a pdb file (and optionally r or g), or a list of vertices with normals bounded by the spheres (requires r and optionally includes g) Args: xyz (float nx3): Array containing centers (Default value = None) r (float nx1): Array containing radii (Default value = None) xyzr (float nx4): Array containing centers and radii (Default value = None) xyzrg (float nx5): Array containing centers, radii, and groups (Default value = None) g (float nx1): Array containing groups (Default value = None) pdb (str): filename of a pdb to be processed into spheres (Default value = None) bv (float nx6): Array containing vertices and normals (Default value = None) mesh (Trimesh): mesh object describing the surface (Default value = None) name (str): descriptive identifier (Default value = None) spheres_file (str): filename of a Spheres file to be read from disk (Default value = None) """ if xyzrg is not None: self.xyzrg = xyzrg elif xyzr is not None: self.xyzr = xyzr if g is not None: self.g = g elif xyz is not None: self.xyz = xyz if r is not None: self.r = r if g is not None: self.g = g elif pdb is not None: if not sys.warnoptions: import warnings warnings.simplefilter("ignore") p = PDBParser(PERMISSIVE=1, QUIET=True) structure = p.get_structure("prot", pdb) self.xyz = np.array( [atom.get_coord() for atom in structure[0].get_atoms()]) if r is not None: self.r = r else: self.r = [ _get_atom_radius(atom, rtype='united') for atom in structure[0].get_atoms() ] if g is not None: self.g = g elif bv is not None and r is not None: self.xyz = bv[:, 0:3] + r * bv[:, 3:6] self.r = r self.remove_duplicates() if g is not None: self.g = g elif spheres_file is not None: xyzr_file = None obj_file = None base, ext = os.path.splitext(spheres_file) if ext == ".xyzrg": xyzrg_file = spheres_file obj_file = "{0}.obj".format(base) elif ext == ".obj": xyzrg_file = "{0}.xyzrg".format(base) if not os.path.isfile(xyzrg_file): logger.error( "No spheres file found with the name: {0}.xyzr or {0}.xyzrg" .format(base)) obj_file = spheres_file else: logger.error( "Invalid filename given to read in spheres object: {0}". format(spheres_file)) raise ValueError( "Spheres objects must be .xyzrg or .obj ({0} provided)". format(spheres_file)) spheres_data = np.loadtxt(xyzrg_file, delimiter=' ') if spheres_data.shape[1] == 5: self.xyzrg = spheres_data elif spheres_data.shape[1] == 4: self.xyzr = spheres_data else: logger.error( "Spheres csv file contains the wrong number of columns") raise ValueError( "{0} columns found in file {1}; must contain 4 or 5". format(spheres_data.shape[1], spheres_file)) mesh = trimesh.load_mesh(obj_file) if name is None: name = os.path.basename(base) if mesh is not None: self.mesh = mesh else: self.mesh = None if name is not None: self.name = name else: self.name = None unique_ind = np.unique(self.xyzrg, axis=0, return_index=True)[1] self.xyzrg = self.xyzrg[sorted(unique_ind), :]
import heapq import numpy as np from collections import deque from Bio.PDB import PDBParser, DSSP HELIX_CODES = ['H', 'G', 'I'] BETA_CODES = ['B', 'E'] SHORT_CODES = ['T', 'S', 'C', '-', '', ' '] if __name__ == '__main__': res = [] path = 'D:/work/bioproteins_dl/deep/alpha-fold/train_dataset/' protname = '6ryj' pdb_filename = path + '' + protname + '.pdb' p = PDBParser() structure = p.get_structure(protname, pdb_filename) model = structure[0] dssp = DSSP(model, pdb_filename) f = open(path + protname + '.ss_sa', 'w') f.write('>' + protname + '\n') for residue in dssp: print(residue[2]) if residue[2] in HELIX_CODES: res.append('H') elif residue[2] in BETA_CODES: res.append('E') else: res.append('C') r = ''.join(res) f.write(r) f.flush()
def atom_information(pdbdata, mode): #analyze pdb file parser = PDBParser(QUIET=True, PERMISSIVE=True) structure = parser.get_structure('model', pdbdata) #DSSP prediction pmodel = structure[0] dssp = DSSP(pmodel, pdbdata) #Set variables global coordinates global color global radius global chains global chain_coords global chain_colors if mode == 'cpk': #list of atoms atoms = [atom for atom in structure.get_atoms()] natoms = len(atoms) #atom coordinates coordinates = np.array([atom.coord for atom in atoms]) center = centroid(coordinates) coordinates -= center #atom color color = [colorrgba(atom.get_id()) for atom in atoms] #atom radius radius = np.array([vrad(atom.get_id()) for atom in atoms]) elif mode == 'aminoacid': #list of atoms atoms = [ atom for atom in structure.get_atoms() if atom.get_parent().resname != 'HOH' ] natoms = len(atoms) #atom coordinates coordinates = np.array([atom.coord for atom in atoms]) center = centroid(coordinates) coordinates -= center #atom color color = [ colorrgba(restype(atom.get_parent().resname)) for atom in atoms ] #atom radius radius = np.array([vrad(atom.get_id()) for atom in atoms]) elif mode == 'backbone': #list of atoms atoms = [ atom for atom in structure.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ] natoms = len(atoms) #atom coordinates coordinates = np.array([atom.coord for atom in atoms]) center = centroid(coordinates) coordinates -= center #atom color color = [] #list of arrays of coordinates and colors for each chain chains = [] chain_colors = [] chain_coords = [] for chain in structure.get_chains(): chains.append(chain) can_coord = np.array([ atom.coord for atom in chain.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ]) can_coord -= center chain_coords.append(can_coord) chain_length = len(can_coord) chain_color = np.append(np.random.rand(1, 3), [1.0]) chain_colors.append(chain_color) color.append(np.tile(chain_color, (chain_length, 1))) if len(chains) > 1: color = np.concatenate(color) #atom radius radius = np.array([vrad(atom.get_id()) for atom in atoms]) elif mode == 'dssp': #list of atoms atoms = [ atom for atom in structure.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ] natoms = len(atoms) #atom coordinates coordinates = np.array([atom.coord for atom in atoms]) center = centroid(coordinates) coordinates -= center #atom color struct3 = [dssp[key][2] for key in list(dssp.keys())] residues = [ residue for residue in structure.get_residues() if residue.get_resname() in resdict.keys() ] color = [] for i in range(len(struct3)): dsspcolor = crgbaDSSP(struct3[i]) n_atoms = len([ atom for atom in residues[i] if atom.get_name() == 'CA' or atom.get_name() == 'N' ]) color.append(np.tile(dsspcolor, (n_atoms, 1))) if len(struct3) > 1: color = np.concatenate(color) #list of arrays of coordinates and colors for each chain chains = [] chain_colors = [] chain_coords = [] for chain in structure.get_chains(): chains.append(chain) chain_color = np.append(np.random.rand(1, 3), [1.0]) chain_colors.append(chain_color) can_coord = np.array([ atom.coord for atom in chain.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ]) can_coord -= center chain_coords.append(can_coord) #atom radius radius = np.array([vrad(atom.get_id()) for atom in atoms])
#@TODO: helping function: for tests isues only; to be removed!! def save_pdb(struct, name): print "test", len(list(struct.get_residues())) for resi in struct.get_residues(): print resi.id, resi.resname out = PDBIO() out.set_structure(struct) out.save(str(name) + 'volume_simulator.pdb') if __name__ == '__main__': p = PDBParser(PERMISSIVE=False, QUIET=True) st = p.get_structure("Zfull.pdb", "Zfull.pdb") component = Component() component.pyrystruct = st dd_frag = DisorderedFragment() dd_frag.set_modeling_disordered_fragment(component.pyrystruct) mass_centre = [0, 0, 0] g = Grapes() g.set_volume_simulation_parameters(dd_frag, component.pyrystruct, mass_centre) res = g.generate() dd_frag.add_pseudoatoms_to_structure(res, component.pyrystruct.moltype)
for residue in residue_list: if not is_aa(residue): continue rd = residue_depth(residue, surface) ca_rd = ca_depth(residue, surface) # Get the key res_id = residue.get_id() chain_id = residue.get_parent().get_id() depth_dict[(chain_id, res_id)] = (rd, ca_rd) depth_list.append((residue, (rd, ca_rd))) depth_keys.append((chain_id, res_id)) # Update xtra information residue.xtra['EXP_RD'] = rd residue.xtra['EXP_RD_CA'] = ca_rd AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list) if __name__ == "__main__": import sys from Bio.PDB import PDBParser p = PDBParser() s = p.get_structure("X", sys.argv[1]) model = s[0] rd = ResidueDepth(model, sys.argv[1]) for item in rd: print(item)
def randomize_starting_position(ligand_file, complex_file, outputfolder=".", nposes=200, test=False, user_center=None, logger=None): """ Randomize initial ligand position around the receptor. Default number of poses = 200. :param ligand_file: :param complex_file: :param nposes: :return: """ if test: np.random.seed(42) # read in files parser = PDBParser() output = [] structure = parser.get_structure('protein', complex_file) ligand = parser.get_structure('ligand', ligand_file) COI = np.zeros(3) # get center of interface (if PPI) if user_center: try: chain_id, res_number, atom_name = user_center.split(":") except ValueError: raise cs.WrongAtomStringFormat(f"The specified atom is wrong '{user_center}'. \ Should be 'chain:resnumber:atomname'") for chain in structure.get_chains(): if chain.id == chain_id: for residue in chain.get_residues(): if residue.id[1] == int(res_number): for atom in residue.get_atoms(): if atom.name == atom_name: COI = np.array(list(atom.get_vector())) # calculate protein and ligand COM com_protein = calculate_com(structure) com_ligand = calculate_com(ligand) # calculating the maximum d of the ligand coor_ligand = [] for atom in ligand.get_atoms(): coor_ligand.append(list(atom.get_vector() - com_ligand)) coor_ligand = np.array(coor_ligand) coor_ligand_max = np.amax(coor_ligand, axis=0) d_ligand = np.sqrt(np.sum(coor_ligand_max ** 2)) # set threshold for near and far contacts based on ligand d if d_ligand / 2 < 5.0: d5_ligand = 5.0 else: d5_ligand = d_ligand / 2 + 1 if d_ligand > 8.0: d8_ligand = d_ligand / 2 + 4 else: d8_ligand = 8.0 # calculate vector to move the ligandi if user_center: move_vector = com_ligand - COI else: move_vector = com_ligand - com_protein # translate the ligand to the protein COM (COI for PPI) original_coords = [] for atom in ligand.get_atoms(): ligand_origin = np.array(list(atom.get_vector())) - move_vector original_coords.append(ligand_origin) atom.set_coord(ligand_origin) # calculating the maximum radius of the protein from the origin coor = [] for atom in structure.get_atoms(): coor.append(list(atom.get_vector() - com_protein)) coor = np.array(coor) coor_max = np.amax(coor, axis=0) d = np.sqrt(np.sum(coor_max ** 2)) # radius of the sphere from the origin D = 10.0 if user_center else np.ceil(6.0 + d) D_initial = D logger.info("Sampling {}A spherical box around the centre of the receptor/interface.".format(D)) if user_center: sphere_cent = COI else: sphere_cent = com_protein j = 0 logger.info("Generating {} poses...".format(nposes)) start_time = time.time() while (j < nposes): # generate random coordinates phi = np.random.uniform(0, 2 * np.pi) costheta = np.random.uniform(-1, 1) u = np.random.uniform(0, 1) theta = np.arccos(costheta) r = D * np.cbrt(u) x = r * np.sin(theta) * np.cos(phi) y = r * np.sin(theta) * np.sin(phi) z = r * np.cos(theta) # move ligand to the starting point (protein COM) for atom, coord in zip(ligand.get_atoms(), original_coords): atom.set_coord(coord) # translate ligand to a random position translation = (x, y, z) for atom in ligand.get_atoms(): new_pos_lig_trans = np.array(list(atom.get_vector())) - translation atom.set_coord(new_pos_lig_trans) # calculate ligand COM in the new position new_ligand_COM = calculate_com(ligand) # rotate ligand vector = Vector(new_ligand_COM) rotation_matrix = rotaxis(np.random.randint(0, 2 * np.pi), vector) for atom in ligand.get_atoms(): coords_after = atom.get_vector().left_multiply(rotation_matrix) atom.set_coord(coords_after) # check if it's inside the sampling sphere dist = np.sqrt((new_ligand_COM[0] - sphere_cent[0]) ** 2 + (new_ligand_COM[1] - sphere_cent[1]) ** 2 + ( new_ligand_COM[2] - sphere_cent[2]) ** 2) if dist < D: # check contacts at: 5A (no contacts) and 8A (needs contacts) protein_list = Selection.unfold_entities(structure, "A") contacts5 = [] contacts8 = [] ligand_atoms = list(ligand.get_atoms()) contacts5.append( NeighborSearch(protein_list).search(new_ligand_COM, d5_ligand, "S")) contacts8 = NeighborSearch(protein_list).search(new_ligand_COM, d8_ligand, "S") if contacts8 and not any(contacts5): j += 1 io = PDBIO() io.set_structure(ligand) output_name = os.path.join(outputfolder, 'ligand{}.pdb'.format(j)) io.save(output_name) output.append(output_name) start_time = time.time() end_time = time.time() total_time = end_time - start_time if total_time > 60: D += 1 if D - D_initial >= 20: logger.info("Original box increased by 20A. Aborting...") break start_time = end_time logger.info("Increasing sampling box by 1A.") logger.info("{} poses created successfully.".format(j)) return output, D, list(sphere_cent)
f.write("\nNumero: %s\n" % ref.number) f.write("\nPosicao: %s\n" % ref.positions) f.write("\nComentarios: %s\n" % ref.comments) f.write("\nReferencias: %s\n" % ref.references) f.write("\nAutores: %s\n" % ref.authors) f.write("\nTitulo: %s\n" % ref.title) f.write("\nLocalizacao: %s\n\n" % ref.location) break except Exception: break f.close() #análise da estrutura das proteÃnas relevantes com base nos ficheiros PDB encontrados (código baseado no desenvolvido pelo grupo 10) parser = PDBParser() ficheiro = open("analise_pdb.txt", "w") structure = parser.get_structure('4F67', '4F67.pdb') pdbl = PDBList() pdbl.retrieve_pdb_file('4F67') ficheiro.write("****Analise do ficheiro 4F67.pdb****\n") ficheiro.write("\nPalavras Chave: %s\n" % structure.header['keywords']) ficheiro.write("\nNome do Organismo: %s\n" % structure.header['name']) ficheiro.write("\nCabecalho: %s" % structure.header['head']) ficheiro.write("\nData da deposicao: %s\n" % structure.header['deposition_date']) ficheiro.write("\nData da publicacaos: %s\n" % structure.header['release_date']) ficheiro.write("\nMetodo usado: %s\n" % structure.header['structure_method']) ficheiro.write("\nResolucao: %s\n" % structure.header['resolution']) ficheiro.write("\nReferencia da estrutura: %s\n" % structure.header['structure_reference']) ficheiro.write("\nReferencia de artigo: %s\n" %
def pdb_buildstructure(pdbfile): pdb_parser = PDBParser( PERMISSIVE=1 ) # The PERMISSIVE instruction allows PDBs presenting errors. return pdb_parser.get_structure( "name", pdbfile) # This command gets the structure of the PDB
for file in os.listdir(directory): filename = os.fsdecode(file) if filename.endswith(".pdb"): # dataset_dict[filename] = idx dataset_filenames.append(filename) idx += 1 pdb_to_seq = {} parser = PDBParser() ppb = PPBuilder() i = 0 for filename in dataset_filenames: with warnings.catch_warnings(record=True): with open(os.path.join(Constants.PDB_PATH, filename)) as f: structure = parser.get_structure(os.path.splitext(filename)[0], f) model = structure[0] for pp in ppb.build_peptides(model): #print(pp.get_sequence()) pdb_to_seq[filename] = str(pp.get_sequence()) break file_to_ds = {} with open(Constants.TRAIN_VAL_TEST_SPLIT_FILE_PATH) as file: split_d = json.load(file) for tr_val_or_test, filenames in split_d.items(): for fn in filenames: file_to_ds[fn] = tr_val_or_test seq_to_pdbs = {}
def generate_seq_file(score_file, save_file): score_file = './dataFile/' + score_file sf = pd.read_csv(score_file, sep='\t') mut_chains = sf.iloc[:, 0] mut_dict = dict() mut_track = set() pdb_track = set() for chain in mut_chains: info = chain.split('_') pdb_id = info[0] chain_id = info[1] wt_aa = info[2][0:3] mu_aa = info[2][-3:] mu_pos = int(''.join(filter(lambda x: x.isdigit(), info[2]))) if not chain in mut_track: mut_track.add(chain) if pdb_id in pdb_track: mut_dict[pdb_id].append({ 'chain_id': chain_id, 'wt_aa': wt_aa, 'mu_aa': mu_aa, 'mu_pos': mu_pos, 'name': chain }) else: mut_dict[pdb_id] = [{ 'chain_id': chain_id, 'wt_aa': wt_aa, 'mu_aa': mu_aa, 'mu_pos': mu_pos, 'name': chain }] pdb_track.add(pdb_id) del mut_track del pdb_track parser = PDBParser() seq_builder = PPBuilder() pdb_dl_handle = PDBList() PDB_DIR = './dataFile/PDB_dl' # check if pdb file exists mut_collect = dict() for pdb_id in mut_dict.keys(): if not os.path.exists(PDB_DIR + '/pdb' + pdb_id.lower() + '.ent'): pdb_dl_handle.retrieve_pdb_file(pdb_code=pdb_id, file_format='pdb', overwrite=False, pdir=PDB_DIR) pdb_file = PDB_DIR + '/pdb' + pdb_id.lower() + '.ent' model = parser.get_structure(pdb_id, pdb_file)[0] for mutation in mut_dict[pdb_id]: protein_chain = model[mutation['chain_id']] sequence = "".join([ str(pp.get_sequence()) for pp in seq_builder.build_peptides(protein_chain) ]) sequence = sequence.replace('\n', '').replace(' ', '') assert sequence[mutation['mu_pos'] - 1] == three_to_one( mutation['wt_aa']), 'Wt amino acid failed to match' mut_Seq_list = list(sequence) mut_Seq_list[mutation['mu_pos'] - 1] = three_to_one( mutation['mu_aa']) mut_Seq = ''.join(mut_Seq_list) mut_collect[mutation['name']] = mut_Seq with open(save_file, 'w') as output_hl: for k, v in mut_collect.items(): output_hl.write(k + '\t' + v + '\n')
def test_show_biopython(): from Bio.PDB import PDBParser parser = PDBParser() structure = parser.get_structure('protein', nv.datafiles.PDB) nv.show_biopython(structure)
def getStructure(self): parser = PDBParser(PERMISSIVE=1) return parser.get_structure(self.ident, self.retrievePDB())
from Bio.PDB import PDBParser if len(sys.argv) != 4: print "Expects three arguments," print " - FASTA alignment filename (expect two sequences)" print " - PDB file one" print " - PDB file two" sys.exit() # The alignment fa = AlignIO.read(open(sys.argv[1]), "fasta", generic_protein) pdb_file1 = sys.argv[2] pdb_file2 = sys.argv[3] # The structures p = PDBParser() s1 = p.get_structure('1', pdb_file1) p = PDBParser() s2 = p.get_structure('2', pdb_file2) # Get the models m1 = s1[0] m2 = s2[0] al = StructureAlignment(fa, m1, m2) # Print aligned pairs (r is None if gap) for (r1, r2) in al.get_iterator(): print r1, r2
def parse_structure(path): """ Parses a structure using Biopython's PDB/mmCIF Parser Verifies the integrity of the structure (gaps) and its suitability for the calculation (is it a complex?). """ log = logging.getLogger("Prodigy") log.info("[+] Reading structure file: {0}".format(path)) fname = os.path.basename(path) sname = ".".join(fname.split(".")[:-1]) s_ext = fname.split(".")[-1] _ext = set(("pdb", "ent", "cif")) if s_ext not in _ext: raise IOError( "[!] Structure format '{0}' is not supported. Use '.pdb' or '.cif'." .format(s_ext)) if s_ext in set(("pdb", "ent")): sparser = PDBParser(QUIET=1) elif s_ext == "cif": sparser = MMCIFParser() try: s = sparser.get_structure(sname, path) except Exception as e: # log.error("[!] Structure '{0}' could not be parsed".format(sname)) log.error("[!] Structure '{0}' could not be parsed".format(sname)) raise Exception(e) # Keep first model only if len(s) > 1: log.warning( "[!] Structure contains more than one model. Only the first one will be kept" ) model_one = s[0].id for m in s.child_list[:]: if m.id != model_one: s.detach_child(m.id) # Double occupancy check for atom in list(s.get_atoms()): if atom.is_disordered(): residue = atom.parent sel_at = atom.selected_child sel_at.altloc = " " sel_at.disordered_flag = 0 residue.detach_child(atom.id) residue.add(sel_at) # Remove HETATMs and solvent res_list = list(s.get_residues()) def _ignore(r): return r.id[0][0] == "W" or r.id[0][0] == "H" for res in res_list: if _ignore(res): chain = res.parent chain.detach_child(res.id) elif not is_aa(res, standard=True): raise ValueError( "Unsupported non-standard amino acid found: {0}".format( res.resname)) n_res = len(list(s.get_residues())) # Remove Hydrogens atom_list = list(s.get_atoms()) def _ignore(x): return x.element == "H" for atom in atom_list: if _ignore(atom): residue = atom.parent residue.detach_child(atom.name) # Detect gaps and compare with no. of chains pep_builder = PPBuilder() peptides = pep_builder.build_peptides(s) n_peptides = len(peptides) n_chains = len(set([c.id for c in s.get_chains()])) if n_peptides != n_chains: log.warning("[!] Structure contains gaps:") for i_pp, pp in enumerate(peptides): log.warning( "\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > {2.parent.id} {2.resname}{2.id[1]}" .format(i_pp, pp[0], pp[-1])) # raise Exception('Calculation cannot proceed') return (s, n_chains, n_res)
class PolypeptideTests(unittest.TestCase): """Test Polypeptide module.""" @classmethod def setUpClass(self): pdb1 = "PDB/1A8O.pdb" self.parser = PDBParser(PERMISSIVE=True) self.structure = self.parser.get_structure("scr", pdb1) def test_ppbuilder_real(self): """Test PPBuilder on real PDB file.""" ppb = PPBuilder() pp = ppb.build_peptides(self.structure) self.assertEqual(len(pp), 3) # Check termini self.assertEqual(pp[0][0].get_id()[1], 152) self.assertEqual(pp[0][-1].get_id()[1], 184) self.assertEqual(pp[1][0].get_id()[1], 186) self.assertEqual(pp[1][-1].get_id()[1], 213) self.assertEqual(pp[2][0].get_id()[1], 216) self.assertEqual(pp[2][-1].get_id()[1], 220) # Now check sequences pp0_seq = pp[0].get_sequence() pp1_seq = pp[1].get_sequence() pp2_seq = pp[2].get_sequence() self.assertIsInstance(pp0_seq, Seq) self.assertEqual(pp0_seq, "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(pp1_seq, "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(pp2_seq, "TACQG") def test_ppbuilder_real_nonstd(self): """Test PPBuilder on real PDB file allowing non-standard amino acids.""" ppb = PPBuilder() pp = ppb.build_peptides(self.structure, False) self.assertEqual(len(pp), 1) # Check the start and end positions self.assertEqual(pp[0][0].get_id()[1], 151) self.assertEqual(pp[0][-1].get_id()[1], 220) # Check the sequence s = pp[0].get_sequence() self.assertIsInstance(s, Seq) # Here non-standard MSE are shown as M self.assertEqual( "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG", s) def test_ppbuilder_torsion(self): """Test phi/psi angles calculated with PPBuilder.""" ppb = PPBuilder() pp = ppb.build_peptides(self.structure) phi_psi = pp[0].get_phi_psi_list() self.assertIsNone(phi_psi[0][0]) self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3) self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3) self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3) self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3) phi_psi = pp[1].get_phi_psi_list() self.assertIsNone(phi_psi[0][0]) self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3) self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3) phi_psi = pp[2].get_phi_psi_list() self.assertIsNone(phi_psi[0][0]) self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3) self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3) def test_cappbuilder_real(self): """Test CaPPBuilder on real PDB file.""" ppb = CaPPBuilder() pp = ppb.build_peptides(self.structure) pp0_seq = pp[0].get_sequence() pp1_seq = pp[1].get_sequence() pp2_seq = pp[2].get_sequence() self.assertEqual(pp0_seq, "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(pp1_seq, "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(pp2_seq, "TACQG") self.assertEqual( [ca.serial_number for ca in pp[0].get_ca_list()], [ 10, 18, 26, 37, 46, 50, 57, 66, 75, 82, 93, 104, 112, 124, 131, 139, 150, 161, 173, 182, 189, 197, 208, 213, 222, 231, 236, 242, 251, 260, 267, 276, 284, ], ) def test_cappbuilder_real_nonstd(self): """Test CaPPBuilder on real PDB file allowing non-standard amino acids.""" ppb = CaPPBuilder() pp = ppb.build_peptides(self.structure, False) self.assertEqual(len(pp), 1) # Check the start and end positions self.assertEqual(pp[0][0].get_id()[1], 151) self.assertEqual(pp[0][-1].get_id()[1], 220) # Check the sequence s = pp[0].get_sequence() self.assertIsInstance(s, Seq) # Here non-standard MSE are shown as M self.assertEqual( "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG", s) def test_cappbuilder_tau(self): """Test tau angles calculated with CaPPBuilder.""" ppb = CaPPBuilder() pp = ppb.build_peptides(self.structure) taus = pp[1].get_tau_list() self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3) self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3) self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3) thetas = pp[2].get_theta_list() self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3) self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3) self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
def create_structure(filename, quiet=True): print("creating biopython molecule structure...") fileid = filename.rsplit(".", 1)[0] p = PDBParser(QUIET=quiet) structure = p.get_structure(fileid, filename) return structure
def main(): """The main routine for conkit-validate functionality""" parser = create_argument_parser() args = parser.parse_args() global logger logger = conkit.command_line.setup_logging(level="info") if os.path.isfile(args.output) and not args.overwrite: raise FileExistsError('The output file {} already exists!'.format( args.output)) logger.info(os.linesep + "Working directory: %s", os.getcwd()) logger.info("Reading input sequence: %s", args.seqfile) sequence = conkit.io.read(args.seqfile, args.seqformat).top if len(sequence) < 5: raise ValueError('Cannot validate model with less than 5 residues') logger.info("Length of the sequence: %d", len(sequence)) logger.info("Reading input distance prediction: %s", args.distfile) prediction = conkit.io.read(args.distfile, args.distformat).top logger.info("Reading input PDB model: %s", args.pdbfile) model = conkit.io.read(args.pdbfile, args.pdbformat).top p = PDBParser() structure = p.get_structure('structure', args.pdbfile)[0] dssp = DSSP(structure, args.pdbfile, dssp=args.dssp, acc_array='Wilke') logger.info(os.linesep + "Validating model.") if len(sequence) > 500: logger.info( "Input model has more than 500 residues, this might take a while..." ) figure = conkit.plot.ModelValidationFigure( model, prediction, sequence, dssp, map_align_exe=args.map_align_exe) figure.savefig(args.output, overwrite=args.overwrite) logger.info(os.linesep + "Validation plot written to %s", args.output) residue_info = figure.data.loc[:, ['RESNUM', 'SCORE', 'MISALIGNED']] table = PrettyTable() table.field_names = ["Residue", "Predicted score", "Suggested register"] _resnum_template = '{} ({})' _error_score_template = '*** {0:.2f} ***' _correct_score_template = ' {0:.2f} ' _register_template = '*** {} ({}) ***' _empty_register = ' ' for residue in residue_info.values: resnum, score, misalignment = residue current_residue = _resnum_template.format(sequence.seq[resnum - 1], resnum) score = _error_score_template.format( score) if score > 0.5 else _correct_score_template.format(score) if misalignment and resnum in figure.alignment.keys(): register = _register_template.format( sequence.seq[figure.alignment[resnum] - 1], figure.alignment[resnum]) else: register = _empty_register table.add_row([current_residue, score, register]) logger.info(os.linesep) logger.info(table)
class WriteTest(unittest.TestCase): @classmethod def setUpClass(self): self.io = PDBIO() self.parser = PDBParser(PERMISSIVE=1) with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) self.structure = self.parser.get_structure("example", "PDB/1A8O.pdb") def test_pdbio_write_structure(self): """Write a full structure using PDBIO.""" struct1 = self.structure # Ensure that set_structure doesn't alter parent parent = struct1.parent # Write full model to temp file self.io.set_structure(struct1) self.assertIs(parent, struct1.parent) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(len(struct2), 1) self.assertEqual(nresidues, 158) finally: os.remove(filename) def test_pdbio_write_preserve_numbering(self): """Test writing PDB and preserve atom numbering.""" self.io.set_structure(self.structure) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) # default preserve_atom_numbering=False struct = self.parser.get_structure("1a8o", filename) serials = [a.serial_number for a in struct.get_atoms()] og_serials = list(range(1, len(serials) + 1)) self.assertEqual(og_serials, serials) finally: os.remove(filename) def test_pdbio_write_auto_numbering(self): """Test writing PDB and do not preserve atom numbering.""" self.io.set_structure(self.structure) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename, preserve_atom_numbering=True) struct = self.parser.get_structure("1a8o", filename) serials = [a.serial_number for a in struct.get_atoms()] og_serials = [a.serial_number for a in self.structure.get_atoms()] self.assertEqual(og_serials, serials) finally: os.remove(filename) def test_pdbio_write_residue(self): """Write a single residue using PDBIO.""" struct1 = self.structure residue1 = list(struct1.get_residues())[0] # Ensure that set_structure doesn't alter parent parent = residue1.parent # Write full model to temp file self.io.set_structure(residue1) self.assertIs(parent, residue1.parent) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) finally: os.remove(filename) def test_pdbio_write_residue_w_chain(self): """Write a single residue (chain id == X) using PDBIO.""" struct1 = self.structure.copy() # make copy so we can change it residue1 = list(struct1.get_residues())[0] # Modify parent id parent = residue1.parent parent.id = "X" # Write full model to temp file self.io.set_structure(residue1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) # Assert chain remained the same chain_id = [c.id for c in struct2.get_chains()][0] self.assertEqual(chain_id, "X") finally: os.remove(filename) def test_pdbio_write_residue_wout_chain(self): """Write a single orphan residue using PDBIO.""" struct1 = self.structure residue1 = list(struct1.get_residues())[0] residue1.parent = None # detach residue # Write full model to temp file self.io.set_structure(residue1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) # Assert chain is default: "A" chain_id = [c.id for c in struct2.get_chains()][0] self.assertEqual(chain_id, "A") finally: os.remove(filename) def test_pdbio_write_custom_residue(self): """Write a chainless residue using PDBIO.""" res = Residue.Residue((" ", 1, " "), "DUM", "") atm = Atom.Atom("CA", [0.1, 0.1, 0.1], 1.0, 1.0, " ", "CA", 1, "C") res.add(atm) # Ensure that set_structure doesn't alter parent parent = res.parent # Write full model to temp file self.io.set_structure(res) self.assertIs(parent, res.parent) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("res", filename) latoms = list(struct2.get_atoms()) self.assertEqual(len(latoms), 1) self.assertEqual(latoms[0].name, "CA") self.assertEqual(latoms[0].parent.resname, "DUM") self.assertEqual(latoms[0].parent.parent.id, "A") finally: os.remove(filename) def test_pdbio_select(self): """Write a selection of the structure using a Select subclass.""" # Selection class to filter all alpha carbons class CAonly(Select): """Accepts only CA residues.""" def accept_atom(self, atom): if atom.name == "CA" and atom.element == "C": return 1 struct1 = self.structure # Ensure that set_structure doesn't alter parent parent = struct1.parent # Write to temp file self.io.set_structure(struct1) self.assertIs(parent, struct1.parent) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename, CAonly()) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 70) finally: os.remove(filename) def test_pdbio_missing_occupancy(self): """Write PDB file with missing occupancy.""" with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) structure = self.parser.get_structure("test", "PDB/occupancy.pdb") self.io.set_structure(structure) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", BiopythonWarning) self.io.save(filename) self.assertEqual(len(w), 1, w) with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) struct2 = self.parser.get_structure("test", filename) atoms = struct2[0]["A"][(" ", 152, " ")] self.assertIsNone(atoms["N"].get_occupancy()) finally: os.remove(filename) def test_pdbio_write_truncated(self): """Test parsing of truncated lines.""" struct = self.structure # Write to temp file self.io.set_structure(struct) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) # Check if there are lines besides 'ATOM', 'TER' and 'END' with open(filename) as handle: record_set = {l[0:6] for l in handle} record_set -= { "ATOM ", "HETATM", "MODEL ", "ENDMDL", "TER\n", "TER ", "END\n", "END ", } self.assertEqual(len(record_set), 0) finally: os.remove(filename) def test_model_numbering(self): """Preserve model serial numbers during I/O.""" def confirm_numbering(struct): self.assertEqual(len(struct), 3) for idx, model in enumerate(struct): self.assertEqual(model.serial_num, idx + 1) self.assertEqual(model.serial_num, model.id + 1) def confirm_single_end(fname): """Ensure there is only one END statement in multi-model files.""" with open(fname) as handle: end_stment = [] for iline, line in enumerate(handle): if line.strip() == "END": end_stment.append((line, iline)) self.assertEqual(len(end_stment), 1) # Only one? self.assertEqual(end_stment[0][1], iline) # Last line of the file? with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) struct1 = self.parser.get_structure("1lcd", "PDB/1LCD.pdb") confirm_numbering(struct1) # Round trip: serialize and parse again self.io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("1lcd", filename) confirm_numbering(struct2) confirm_single_end(filename) finally: os.remove(filename) def test_pdbio_write_x_element(self): """Write a structure with atomic element X with PDBIO.""" struct1 = self.structure # Change element of one atom atom = next(struct1.get_atoms()) atom.element = "X" # X is assigned in Atom.py as last resort self.io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) finally: os.remove(filename) def test_pdbio_write_unk_element(self): """PDBIO raises ValueError when writing unrecognised atomic elements.""" struct1 = self.structure atom = next(struct1.get_atoms()) atom.element = "1" self.io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) with self.assertRaises(ValueError): self.io.save(filename) os.remove(filename)
from Bio.PDB import PDBParser p = PDBParser(QUIET=True) s = p.get_structure('3mxw.pdb', '3mxw.pdb') n_c = len(list(s.get_chains())) n_h_r = len([ r for r in s[0]['H'] if r.resname != "HOH" ]) print(f"This protein has {n_c} chains and {n_h_r} residues in chain H")
def LoadPDB(PDBID, FOLDER): from Bio.PDB import PDBParser pdbfile = FOLDER + PDBID + '.pdb' parser = PDBParser() structure = parser.get_structure(PDBID, pdbfile) return structure