def split_chains(pdbid): if not os.path.isfile(pdbid): print("Error: Function split_chains: file does not exist:" + pdbid + "\n") return if '.gz' in pdbid: handle = gzip.open(pdbid, "rt") parser = PDB.MMCIFParser(QUIET=True) structure = parser.get_structure(pdbid[:-7], handle) else: handle = open(pdbid, "rt") parser = PDB.MMCIFParser(QUIET=True) structure = parser.get_structure(pdbid[:-4], handle) for model in structure: for chain in model: #io=PDB.PDBIO() Section to output .pdb format #io.set_structure(chain) #chain_filename=(structure.id+chain.id+".pdb") #Output in PDB format #io.save(chain_filename) #cmd=('gzip -f '+chain_filename) #subprocess.call(cmd, shell=True) io = PDB.MMCIFIO() io.set_structure(chain) chain_filename = (structure.id + chain.id + ".cif" ) #Output in MMCIF format io.save(chain_filename) cmd = ('gzip -f ' + chain_filename) subprocess.call(cmd, shell=True)
def align_structs(id1, chain1, id2, chain2): """ the main function. gets the ids and the chain's names and finds the alignment with the best RMSD. prints the best RMSD, and saving the alignments file in cif format :param id1: the first file id :param chain1: the first protein's chain :param id2: the second file id :param chain2: the second protein's chain """ # generating the relevant data lst = pdb.PDBList() protein1 = lst.retrieve_pdb_file(id1) protein2 = lst.retrieve_pdb_file(id2) parser = pdb.MMCIFParser() struct1 = parser.get_structure("p1", protein1) struct2 = parser.get_structure("p2", protein2) # creating a lists of CA atoms to align atoms1 = create_atoms_list(struct1, chain1) atoms2 = create_atoms_list(struct2, chain2) if len(atoms1) != len(atoms2): atoms1, atoms2 = bonus_9_2(chain1, chain2, struct1, struct2) # making the align super_imposer = pdb.Superimposer() super_imposer.set_atoms(atoms1, atoms2) super_imposer.apply(struct2[0].get_atoms()) print(super_imposer.rms) # saving the aligned structure to files saving_file(id1, struct1) saving_file(id2, struct2)
def test_from_mmcif(self): import Bio.PDB as bpdb cg = ftmc.from_pdb('test/forgi/threedee/data/1Y26.cif', parser=bpdb.MMCIFParser()) cg2 = ftmc.from_pdb('test/forgi/threedee/data/1y26.pdb') self.assertEqual(cg.defines, cg2.defines) self.assertGreater(len(cg.defines), 3) for d in cg.defines: nptest.assert_almost_equal(cg.coords[d], cg2.coords[d])
def _read_structure(path, pdb_id='pdb', cif_id='cif' ): file_name = os.path.basename(path).split('.')[0] file_sufix = os.path.basename(path).split('.')[1] dir_path = os.path.dirname(path) if file_sufix == 'pdb': parser = struct.PDBParser(QUIET=True) structure = parser.get_structure(pdb_id, path) elif file_sufix == 'cif': parser = struct.MMCIFParser() structure = parser.get_structure(cif_id, path) else: print("ERROR: Unreognized file type " + file_sufix + " in " + file_name) sys.exit(1) return structure, dir_path, file_name
def read(pdb_file): """ reads a pdb file into a structure object :param pdb_file: pdb format file :return: structure """ logging.info(f'reading pdb file: {pdb_file}') if not pdb_file.lower().endswith('.cif'): structure = PDB.PDBParser().get_structure(pdb_file, pdb_file) else: logging.info(f'switched to cif modus for file: {pdb_file}') structure = PDB.MMCIFParser().get_structure(pdb_file, pdb_file) return structure
def mmcif_to_graph(): from werkzeug import secure_filename name = secure_filename(request.files['pdb_file'].filename) try: result = forna.pdb_to_json(request.files['pdb_file'].read(), name, parser=bpdb.MMCIFParser()) except Exception as ex: app.logger.exception(ex) abort(400, "PDB file parsing error: {}".format(str(ex))) return json.dumps(result), 201
def get_all_chains(in_filename, parser=None): ''' Load the PDB file located at filename, select the longest chain and return it. :param in_filename: The location of the original file. :return: A list of Bio.PDB chain structures corresponding to all RNA structures stored in in_filename ''' if parser is None: #print("in_filename is {}".format(in_filename), file=sys.stderr) if in_filename.endswith(".pdb"): parser = bpdb.PDBParser() elif in_filename.endswith(".cif"): parser = bpdb.MMCIFParser() else: #Cannot determine filetype by extention. Try to read first line. with open(in_filename) as pdbfile: line = pdbfile.readline(20) # According to #page 10 of ftp://ftp.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_A4.pdf # a HEADER entry is mandatory. Biopython sometime starts directly with ATOM if line.startswith("HEADER") or line.startswith("ATOM"): #print("HEADER found", file=sys.stderr) parser = bpdb.PDBParser() else: parser = bpdb.MMCIFParser() #print("HEADER NOT found", file=sys.stderr) with warnings.catch_warnings(): warnings.simplefilter("ignore") s = parser.get_structure('temp', in_filename) if len(s) > 1: warnings.warn("Multiple models in file. Using only the first model") chains = list(chain for chain in s[0] if contains_rna(chain)) return chains
def __init__(self,path, pdb_id='pdb', cif_id='cif'): # copied from input_output.py _read_structure self.file_name = os.path.basename(path).split('.')[0] self.file_sufix = os.path.basename(path).split('.')[1] self.dir_path = os.path.dirname(path) self.params = CP.read_charmm_FF() self.chains = [] self.models = {} if self.file_sufix == 'pdb': self.header = struct.parse_pdb_header(path) self.structure = struct.PDBParser(QUIET=True).get_structure(pdb_id, path) self.has_sequence = False elif self.file_sufix == 'cif': self.header = struct.MMCIF2Dict() self.structure = struct.MMCIFParser().get_structure(cif_id, path) self.has_sequence = True else: print("ERROR: Unreognized file type " + self.file_sufix + " in " + self.file_name) sys.exit(1)
def compute_distance(input_dir, filename, res1, atm1, res2, atm2): handle = gzip.open((input_dir + '/' + filename + '.cif.gz'), 'rt') parser = PDB.MMCIFParser(QUIET=True) structure = parser.get_structure("PDB", handle) ignoremodified = open(f'List_modified_aminoacid.txt', 'r') atom_present = 0 for model in structure: for chain in model: for residue in chain: ignoremodified.seek(0) if (int(residue.id[1]) == int(res1) and residue.get_id()[0] == ' ') or (int(residue.id[1]) == int(res1) and ((residue.id[0][2:] + '\n') in ignoremodified.readlines())): if residue.has_id(atm1): res1_object = residue #residue1=chain[res1] atom_present = atom_present + 1 ignoremodified.seek(0) if (int(residue.id[1]) == int(res2) and residue.get_id()[0] == ' ') or (int(residue.id[1]) == int(res2) and ((residue.id[0][2:] + '\n') in ignoremodified.readlines())): if residue.has_id(atm2): res2_object = residue #residue2=chain[res2] atom_present = atom_present + 1 if atom_present == 2: distance = round( float(res1_object[atm1] - res2_object[atm2]), 2 ) #round works only on type float, so first convert the number to float #print(distance) #print(f'{distance:.2f}') return distance else: return 999
def get_all_chains(in_filename, parser=None, no_annotation=False, assembly_nr=None): ''' Load the PDB file located at filename, read all chains and return them. :param in_filename: The location of the original file. :param assembly_nr: Which assembly to return. Default: The first. :return: a tuple chains, missing_residues * chains: A list of Bio.PDB chain structures corresponding to all RNA structures stored in in_filename * missing_residues: A list of dictionaries, describing the missing residues. * interacting residues: A list of residues ''' if parser is None: if in_filename.endswith(".pdb"): parser = bpdb.PDBParser() elif in_filename.endswith(".cif"): parser = bpdb.MMCIFParser() else: # Cannot determine filetype by extention. Try to read first line. with open(in_filename) as pdbfile: line = pdbfile.readline(20) # According to # page 10 of ftp://ftp.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_A4.pdf # a HEADER entry is mandatory. Biopython sometime starts directly with ATOM if line.startswith("HEADER") or line.startswith("ATOM"): parser = bpdb.PDBParser() else: parser = bpdb.MMCIFParser() with warnings.catch_warnings(): warnings.simplefilter("ignore") s = parser.get_structure('temp', in_filename) if len(s) > 1: warnings.warn("Multiple models in file. Using only the first model") # Let's detach all H2O, to speed up processing. for chain in s[0]: log.debug("Before detaching water from %s: chain has %s residues", chain.id, len(chain)) for r in chain.child_list[:]: # We need a copy here, because we are modifying it during iteration if r.resname.strip() == "HOH": chain.detach_child(r.id) log.debug("After detaching water from %s: chain has %s residues", chain.id, len(s[0][chain.id])) # Rename residues from other programs for chain in s[0]: for r in chain: # rename rosetta-generated structures if r.resname == ' rA': r.resname = ' A' elif r.resname == ' rC': r.resname = ' C' elif r.resname == ' rG': r.resname = ' G' elif r.resname == ' rU': r.resname = ' U' # rename iFoldRNA-generated structures if r.resname == 'ADE': r.resname = ' A' elif r.resname == 'CYT': r.resname = ' C' elif r.resname == 'GUA': r.resname = ' G' elif r.resname == 'URI': r.resname = ' U' # Now search for protein interactions. if not no_annotation: interacting_residues = enumerate_interactions_kdtree(s[0]) else: interacting_residues = set() # The chains containing RNA chains = list(chain for chain in s[0] if contains_rna(chain)) try: log.debug("PDB header %s", parser.header) mr = parser.header["missing_residues"] if assembly_nr is not None: warnings.warn( "Getting an assembly is not supported for the old PDB format.") except AttributeError: # A mmCIF parser cifdict = parser._mmcif_dict # We read a private attribute here, because parsing the mmcif dictionary a second time would cause a performance penalty. # Generate an assembly try: operation_mat, operation_vec = _extract_symmetrymatrices_from_cif_dict( cifdict) except KeyError: pass else: if False: # Still experimental and not working correctly. chains = _get_assemblies(chains, cifdict) mr = [] try: mask = np.array(cifdict["_pdbx_poly_seq_scheme.pdb_mon_id"], dtype=str) == "?" int_seq_ids = np.array( cifdict["_pdbx_poly_seq_scheme.pdb_seq_num"], dtype=int)[mask] cs = np.array(cifdict["_pdbx_poly_seq_scheme.pdb_strand_id"], dtype=str)[mask] insertions = np.array( cifdict["_pdbx_poly_seq_scheme.pdb_ins_code"], dtype=str)[mask] insertions[insertions == "."] = " " symbol = np.array(cifdict["_pdbx_poly_seq_scheme.mon_id"], dtype=str)[mask] except KeyError: pass else: if not no_annotation: for i, sseq in enumerate(int_seq_ids): mr.append({ "model": None, "res_name": symbol[i], "chain": cs[i], "ssseq": sseq, "insertion": insertions[i] }) except KeyError: mr = [] with open(in_filename) as f: for wholeline in f: if wholeline.startswith("REMARK 465"): line = wholeline[10:].strip() mr_info = _parse_remark_465(line) if mr_info is not None: mr.append(mr_info) else: continue else: if mr: log.info("This PDB has missing residues") elif not no_annotation: log.info("This PDB has no missing residues") '''for res1, res2 in itertools.combinations(s[0].get_residues(), 2): rna_res=None other_res=None if res1.resname.strip() in RNA_RESIDUES: rna_res=res1 else: other_res=res1 if res2.resname.strip() in RNA_RESIDUES: rna_res=res2 else: other_res=res2 if rna_res is None or other_res is None: continue if other_res.resname.strip()=="HOH": continue if residues_interact(rna_res, other_res): log.error("%s and %s interact", rna_res, other_res) interacting_residues.add(rna_res)''' log.debug("LOADING DONE: chains %s, mr %s, ir: %s", chains, mr, interacting_residues) return chains, mr, interacting_residues
def get_pdb_STR(pdbPath): STR = PDB.MMCIFParser(QUIET=True).get_structure("pdb",pdbPath) # DICT = PDB.MMCIF2Dict.MMCIF2Dict(cifPath) # print(DICT) return STR #,DICT
def _exp_file_to_data(self, file_path, params): """ _exp_file_to_data: Do the PDB conversion--parse the experiment pdb file for creating a pdb data object """ logging.info( f'Parsing pdb file {file_path} to a pdb structure with params: {params}' ) parser = PDB.MMCIFParser() cif = file_path pp_no = 0 mmcif_data = None try: structure = parser.get_structure("PHA-L", cif) except (RuntimeError, TypeError, KeyError, ValueError) as e: logging.info(f'MMCIFParser errored with message: {e.message}') raise else: ppb = PPBuilder() for pp in ppb.build_peptides(structure): pp_no += 1 struc_name = structure.header.get('name', '') hd = self._upload_to_shock(file_path) # logging.info(f'Getting pdb structure data for {structure}!') (cpd, src) = self._get_compound_source(structure) (num_models, model_ids) = self._get_models_from_structure(structure) (num_chains, chain_ids) = self._get_chains_from_structure(structure) (num_residues, residue_ids) = self._get_residues_from_structure(structure) (num_atoms, atom_ids) = self._get_atoms_from_structure(structure) protein_data = self._get_proteins_by_structure( structure, model_ids[0], file_path) (protein_data, params) = self._match_features(params, protein_data) pdb_info = params.get('pdb_info', None) if pdb_info and pdb_info.get('sequence_identities', None): mmcif_data = { 'name': struc_name, 'head': structure.header.get('head', ''), 'rcsb_id': structure.header.get('rcsb_id', ''), 'deposition_date': structure.header.get('deposition_date', ''), 'release_date': structure.header.get('release_date', ''), 'structure_method': structure.header.get('structure_method', ''), 'resolution': structure.header.get('resolution', 0.0), 'structure_reference': structure.header.get('structure_reference', []), 'keywords': structure.header.get('keywords', ''), 'author': structure.header.get('author', ''), 'compound': cpd, 'source': src, 'num_models': num_models, 'num_chains': num_chains, 'num_residues': num_residues, 'num_atoms': num_atoms, 'num_het_atoms': structure.header.get('num_het_atoms', 0), 'num_water_atoms': structure.header.get('num_water_atoms', 0), 'num_disordered_atoms': structure.header.get('num_disordered_atoms', 0), 'num_disordered_residues': structure.header.get('num_disordered_residues', 0), 'pdb_handle': hd, 'mmcif_handle': hd, 'xml_handle': hd, 'proteins': protein_data } else: mmcif_data = {} logging.info( f'Parsing pdb file {file_path} failed to match KBase genome/features!' ) finally: return mmcif_data, pp_no, params
def test_from_mmcif(self): import Bio.PDB as bpdb cg = ftmc.from_pdb('test/forgi/threedee/data/1Y26.cif', parser=bpdb.MMCIFParser())
def generate_pairwise_subunits_from_pdb(pdb_file_path, templates_path, file_type, verbose): """Take an existing complex and fragment it into each of the pairwise interactions between subunits. Keyword arguments: pdb_file_path -- path where the complex PDB is templates_path -- folder where the resulting folders will be saved file_type -- type of file verbose -- if a log of the program execution is saved Considerations: Does not consider nucleic acid sequences, it is only for testing the program on different complexes""" num_file = 0 if file_type == 'PDB': parser = pdb.PDBParser(PERMISSIVE=1) else: parser = pdb.MMCIFParser() structure = parser.get_structure('pdb_name', pdb_file_path) # give unique chain identifiers to a structure, it has to be similar to the ids of the chains used in build_complex, to be able to use further the structure_in_created_structures() function id_nch = 0 for chain in structure.get_chains(): actual_id = chain.id chain.id = (complete_chain_alphabet[id_nch] + '_', actual_id) id_nch += 1 # free the ./templates_path/ os.system('rm -rf ' + templates_path + '*') # initialize the saved pairs and structures saved_pairs = set() saved_structures = [] # loop through all possible pairwise files for chain1 in structure.get_chains(): for chain2 in structure.get_chains(): # the following strings define the pairs already saved comb = tuple(list(chain1.id) + list(chain2.id)) comb_rev = tuple(list(chain2.id) + list(chain1.id)) if chain1 is not chain2 and comb not in saved_pairs: # save the combination saved_pairs.add(comb) saved_pairs.add(comb_rev) # ask if any of the residues is interacting, if so save the PDB chains_interacting = False for residue1 in chain1: if chains_interacting is True: break for residue2 in chain2: if residue1 != residue2: # define which is the important residue of each chain: atoms1 = [x.id for x in residue1.get_atoms()] atoms2 = [x.id for x in residue2.get_atoms()] important_atom1 = None if 'CA' in atoms1: important_atom1 = residue1['CA'] elif 'P' in atoms1: important_atom1 = residue1['P'] important_atom2 = None if 'CA' in atoms2: important_atom2 = residue2['CA'] elif 'P' in atoms2: important_atom2 = residue2['P'] # compute the distance: if important_atom1 is not None and important_atom2 is not None: distance = important_atom1 - important_atom2 else: continue if distance < 7: chains_interacting = True break if chains_interacting is True: # create a structure object ID = str(num_file) num_file += 1 new_structure = pdb_struct.Structure(ID) new_model = pdb_model.Model(0) new_model.add(chain1.copy()) new_model.add(chain2.copy()) new_structure.add(new_model) # move the coordinates of the structure to simulate what would happen if they were coming from different files rotation = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) translation = np.array((0, 0, 1), 'f') for atom in new_structure.get_atoms(): atom.transform(rotation, translation) # write to new pdb: if structure_in_created_structures( new_structure, saved_structures) is False: # record as a saved structure: saved_structures.append(new_structure.copy()) # give unique chains to a structure (A and B) id_nch = 0 for chain in new_structure.get_chains(): chain.id = chain_alphabet[id_nch] id_nch += 1 if verbose: print( 'writing PDB file with the interaction of %s and %s into %s.pdb' % (chain1.id[1], chain2.id[1], ID)) # write using our customized writer io = pdb.PDBIO() io.set_structure(new_structure) io.save(templates_path + ID + '.pdb')
def get_distance_matrix(mmcif_file, chain_id): """ Given a protein structure in mmcif format and a chain id, extract the residue type, the coordinate of each residue, and the residue numbering. Compute all the pairwise euclidean distances among residues. Returns a dictionary containing all of these data. """ parser = PDB.MMCIFParser() structure = parser.get_structure("_", mmcif_file) out = { # this is the residue identity (which aminoacid it is) "residue": [], # the xyz coordinates for the beta carbon (alpha for GLY) "coordinates": [], # this corresponds to the numerical part of PDB_BEG and PDB_END in the # sifts mapping table (es. 1 for residue 1A) "resseq": [], # this corresponds to the letteral part of PDB_BEG and PDB_END in the # sifts mapping table (es. A for residue 1A) "icode": [], } matching_chains = 0 for model in structure: if model.id == 0: print("Processing model:", model.id) for chain in model: if chain.id != chain_id: continue matching_chains += 1 for residue in chain.get_residues(): het_field = residue.id[0] # discard HETATM records if het_field != " ": continue out["residue"].append(residue.resname) if residue.resname == "GLY": # GLY does not have a beta carbon out["coordinates"].append(residue["CA"].get_coord()) else: out["coordinates"].append(residue["CB"].get_coord()) out["resseq"].append(residue.id[1]) out["icode"].append(residue.id[2]) else: print("Skipping model:", model.id) assert matching_chains == 1 out["coordinates"] = np.array(out["coordinates"], dtype=float) out["resseq"] = np.array(out["resseq"], dtype=int) out["icode"] = np.array(out["icode"], dtype=str) # NaN is not defined in a str array, and I need to represent in a way which # is coherent with the way pandas represents it out["icode"] = np.where(out["icode"] == " ", "nan", out["icode"]) # the Minkowski 2-norm is the euclidean distance out["distance_matrix"] = spatial.distance_matrix(out["coordinates"], out["coordinates"], p=2) out["sequence"] = "".join([ SeqUtils.IUPACData.protein_letters_3to1[r.capitalize()] for r in out["residue"] ]) out["pdb_id"] = mmcif_file.split(".")[0] out["chain_id"] = chain_id return out
def parse(*, file_id: str, mmcif_string: str, catch_all_errors: bool = True) -> ParsingResult: """Entry point, parses an mmcif_string. Args: file_id: A string identifier for this file. Should be unique within the collection of files being processed. mmcif_string: Contents of an mmCIF file. catch_all_errors: If True, all exceptions are caught and error messages are returned as part of the ParsingResult. If False exceptions will be allowed to propagate. Returns: A ParsingResult. """ errors = {} try: parser = PDB.MMCIFParser(QUIET=True) handle = io.StringIO(mmcif_string) full_structure = parser.get_structure('', handle) first_model_structure = _get_first_model(full_structure) # Extract the _mmcif_dict from the parser, which contains useful fields not # reflected in the Biopython structure. parsed_info = parser._mmcif_dict # pylint:disable=protected-access # Ensure all values are lists, even if singletons. for key, value in parsed_info.items(): if not isinstance(value, list): parsed_info[key] = [value] header = _get_header(parsed_info) # Determine the protein chains, and their start numbers according to the # internal mmCIF numbering scheme (likely but not guaranteed to be 1). valid_chains = _get_protein_chains(parsed_info=parsed_info) if not valid_chains: return ParsingResult( None, {(file_id, ''): 'No protein chains found in this file.'}) seq_start_num = { chain_id: min([monomer.num for monomer in seq]) for chain_id, seq in valid_chains.items() } # Loop over the atoms for which we have coordinates. Populate two mappings: # -mmcif_to_author_chain_id (maps internal mmCIF chain ids to chain ids used # the authors / Biopython). # -seq_to_structure_mappings (maps idx into sequence to ResidueAtPosition). mmcif_to_author_chain_id = {} seq_to_structure_mappings = {} for atom in _get_atom_site_list(parsed_info): if atom.model_num != '1': # We only process the first model at the moment. continue mmcif_to_author_chain_id[ atom.mmcif_chain_id] = atom.author_chain_id if atom.mmcif_chain_id in valid_chains: hetflag = ' ' if atom.hetatm_atom == 'HETATM': # Water atoms are assigned a special hetflag of W in Biopython. We # need to do the same, so that this hetflag can be used to fetch # a residue from the Biopython structure by id. if atom.residue_name in ('HOH', 'WAT'): hetflag = 'W' else: hetflag = 'H_' + atom.residue_name insertion_code = atom.insertion_code if not _is_set(atom.insertion_code): insertion_code = ' ' position = ResiduePosition(chain_id=atom.author_chain_id, residue_number=int( atom.author_seq_num), insertion_code=insertion_code) seq_idx = int( atom.mmcif_seq_num) - seq_start_num[atom.mmcif_chain_id] current = seq_to_structure_mappings.get( atom.author_chain_id, {}) current[seq_idx] = ResidueAtPosition(position=position, name=atom.residue_name, is_missing=False, hetflag=hetflag) seq_to_structure_mappings[atom.author_chain_id] = current # Add missing residue information to seq_to_structure_mappings. for chain_id, seq_info in valid_chains.items(): author_chain = mmcif_to_author_chain_id[chain_id] current_mapping = seq_to_structure_mappings[author_chain] for idx, monomer in enumerate(seq_info): if idx not in current_mapping: current_mapping[idx] = ResidueAtPosition(position=None, name=monomer.id, is_missing=True, hetflag=' ') author_chain_to_sequence = {} for chain_id, seq_info in valid_chains.items(): author_chain = mmcif_to_author_chain_id[chain_id] seq = [] for monomer in seq_info: code = SCOPData.protein_letters_3to1.get(monomer.id, 'X') seq.append(code if len(code) == 1 else 'X') seq = ''.join(seq) author_chain_to_sequence[author_chain] = seq mmcif_object = MmcifObject( file_id=file_id, header=header, structure=first_model_structure, chain_to_seqres=author_chain_to_sequence, seqres_to_structure=seq_to_structure_mappings, raw_string=parsed_info) return ParsingResult(mmcif_object=mmcif_object, errors=errors) except Exception as e: # pylint:disable=broad-except errors[(file_id, '')] = e if not catch_all_errors: raise return ParsingResult(mmcif_object=None, errors=errors)
ligDF = pd.read_csv(ligFile) feLig = ligDF[ligDF['Ligand_Formula'].str.contains("Fe") == True] import csv #loop through feLig and access mmcif file with open("dataOut.txt","w") as outFile: outWriter = csv.writer(outFile) for row in feLig.itertuples(): with gzip.open(os.path.join(d,"mmcif",row.PDB_ID.lower()+".cif.gz"),"rt") as handle: # print(row.PDB_ID,row.Chain_ID) STR = PDB.MMCIFParser(QUIET=True).get_structure(row.PDB_ID,handle) chain = STR[0][row.Chain_ID] atom_list = PDB.Selection.unfold_entities(STR,"A") # print(type(row.Ligand_ID)) # gather ligands that have the intended name ligands = [res for res in chain.get_list() if res.get_resname() == row.Ligand_ID] #loop through lig atoms, get fe coords, get nearest neighbors of fe's for lig in ligands: for atom in lig.get_atoms(): # print(atom.get_name()) if "FE" in atom.get_name():
from Bio import PDB parser = PDB.MMCIFParser() structure = parser.get_structure("2DN1", "dn\\2dn1.cif") model = structure[0] chain = model['A'] residue_1 = chain[2] residue_2 = chain[3] atom_1 = residue_1['CA'] atom_2 = residue_2['CA'] dist = atom_1 - atom_2 print(dist)
def compute_dihedrals(pdbfilename): ignoremodified=('PTR','TPO','SEP','MSE','BWB','CAS','CME','CSO','CSS','CSX','MK8','MLY','NEP','NMM','PHD','CAF','CSD','CYO','OCS','OCY','SCS',\ 'ALY','KCX',',LGY','CXM','MHO','T8L','ACE','AME','CY0','UNK','T8L','MHO','COM') if '.gz' in pdbfilename.lower(): handle = gzip.open(pdbfilename, 'rt') pdbfilename = pdbfilename[0:-3] else: handle = open(pdbfilename, 'r') if '.pdb' in pdbfilename.lower(): parser = PDB.PDBParser(QUIET=True) if '.cif' in pdbfilename.lower(): parser = PDB.MMCIFParser(QUIET=True) structure = parser.get_structure("PDB", handle) for model in structure: for chain in model: first = 1 for residue in chain: if residue.id[0] != ' ' or residue.id[0][2:] in ignoremodified: continue if first == 1: #The 'first' blocks are required to assign first and second residue to variables prev_residue = residue first = 2 continue if first == 2: #This block computes psi dihedral for first residue curr_residue = residue psi = compute_psi(structure, model, chain, prev_residue, curr_residue) chi1 = compute_chi1(structure, model, chain, prev_residue) chi2 = compute_chi2(structure, model, chain, prev_residue) chi3 = compute_chi3(structure, model, chain, prev_residue) chi4 = compute_chi4(structure, model, chain, prev_residue) first = 3 print(pdbfilename[0:-4].rjust(8)+str(model.id).rjust(8)+chain.id.rjust(8)+str(prev_residue.id[1]).rjust(8)+prev_residue.resname.rjust(8)+\ str(999.00).rjust(8)+str(psi).rjust(8)+str(999.00).rjust(8)+str(chi1).rjust(8)+str(chi2).rjust(8)+str(chi3).rjust(8)+str(chi4).rjust(8)) continue if first == 3: #This block computes phi and psi dihedrals from second residue onward. At anytime in the block we have three residue variables assigned. next_residue = residue phi = compute_phi(structure, model, chain, prev_residue, curr_residue) psi = compute_psi(structure, model, chain, curr_residue, next_residue) omega = compute_omega(structure, model, chain, prev_residue, curr_residue) chi1 = compute_chi1(structure, model, chain, curr_residue) chi2 = compute_chi2(structure, model, chain, curr_residue) chi3 = compute_chi3(structure, model, chain, curr_residue) chi4 = compute_chi4(structure, model, chain, curr_residue) print(pdbfilename[0:-4].rjust(8)+str(model.id).rjust(8)+chain.id.rjust(8)+str(curr_residue.id[1]).rjust(8)+curr_residue.resname.rjust(8)+\ str(phi).rjust(8)+str(psi).rjust(8)+str(omega).rjust(8)+str(chi1).rjust(8)+str(chi2).rjust(8)+str(chi3).rjust(8)+str(chi4).rjust(8)) prev_residue = curr_residue curr_residue = next_residue #update residue variables if first == 3: #This block computes phi dihedral for the last residue phi = compute_phi(structure, model, chain, prev_residue, curr_residue) omega = compute_omega(structure, model, chain, prev_residue, curr_residue) chi1 = compute_chi1(structure, model, chain, curr_residue) chi2 = compute_chi2(structure, model, chain, curr_residue) chi3 = compute_chi3(structure, model, chain, curr_residue) chi4 = compute_chi4(structure, model, chain, curr_residue) print(pdbfilename[0:-4].rjust(8)+str(model.id).rjust(8)+chain.id.rjust(8)+str(curr_residue.id[1]).rjust(8)+curr_residue.resname.rjust(8)\ +str(phi).rjust(8)+str(999.00).rjust(8)+str(omega).rjust(8)+str(chi1).rjust(8)+str(chi2).rjust(8)+str(chi3).rjust(8)+str(chi4).rjust(8)) return