def sec_str_rev_filter(PDBfile, model, chain, seq_index, sec_str): """ Helper function A filter for secondary structures. Reverse filter of sec_str_filter Checks residues in seq_index of chain, removes those w/ specified sec_str Returns filtered seq_index """ from Bio.PDB import DSSP dssp = DSSP(model, PDBfile) ##need to hide this output num = 0 while num < len(seq_index): try: res = chain.child_list[seq_index[num]] sec = dssp.__getitem__(res)[0] if sec == sec_str: seq_index.remove(seq_index[num]) else: num += 1 except KeyError: #not an amino acid num += 1 return seq_index
def make_dssp(pdb_id, chain, chain_delimiter=':'): """ Retrieve dssp string from PDB database :param pdb_id: pdb id :param chain: pdb chain id :param chain_delimiter: delimiter between id and chain id :return: id, aa sequence, ss sequence """ url = PDB_URL + pdb_id + '.pdb' urllib.request.urlretrieve(url, pdb_id) parser = PDBParser() structure = parser.get_structure(pdb_id, pdb_id) dssp = DSSP(structure[0], pdb_id, dssp='mkdssp') aa = '' ss = '' for key in dssp.keys(): if key[0] == chain: aa += dssp[key][1] ss += SS_MAP[dssp[key][2]] os.remove(pdb_id) return pdb_id + chain_delimiter + chain, aa, ss
def run_DSSP(self, corresponding_gene_call, pdb_filepath): """ DSSP is ran using the API developed in Biopython. That means we don't work directly from the text output of DSSP, but rather a Biopython object. """ # Determine the model name by loading the structure file p = PDBParser() structure = p.get_structure(corresponding_gene_call, pdb_filepath) model = structure[ 0] # pdb files can have multiple models. DSSP assumes the first. # run DSSP residue_annotation = DSSP(model, pdb_filepath, dssp=self.DSSP_executable, acc_array="Wilke") if not len(residue_annotation.keys()): raise ConfigError("Your executable of DSSP, `{}`, exists but didn't return any meaningful output. This\ is a known issue with certain distributions of DSSP. For information on how to test\ that your version is working correctly, please visit\ http://merenlab.org/2016/06/18/installing-third-party-software/#dssp"\ .format(self.DSSP_executable, pdb_filepath)) # convert to a digestible format return self.convert_DSSP_output_from_biopython_to_dataframe( residue_annotation)
def run_dssp(biopdb, pdb_path, work_dir=None, job=None): work_dir = work_dir or os.getcwd() if apiDockerCall is not None and job is not None: if not os.path.abspath( os.path.dirname(pdb_path)) == os.path.abspath(work_dir): shutil.copy(pdb_file, work_dir) parameters = [ '-i', os.path.join("/data", os.path.basename(pdb_path)), '-o', os.path.join("/data", os.path.basename(pdb_path) + ".dssp") ] apiDockerCall(job, image='edraizen/dssp:latest', working_dir=work_dir, parameters=parameters) dssp = DSSP(biopdb[0], os.path.join(work_dir, os.path.basename(pdb_path) + ".dssp"), file_type='DSSP') else: try: dssp = DSSP(biopdb, pdb_path, dssp='dssp') except KeyboardInterrupt: raise except NameError: dssp = None except Exception as e: raise InvalidPDB("Cannot run dssp for {}".format(pdb_path)) return dssp
def run(self): tmp = QTemporaryFile() result = {} io = None dssp = None prevChain = None key = None if tmp.open(): io = PDBIO() io.set_structure(self.struct) io.save(tmp.fileName()) try: dssp = DSSP(self.struct[0], tmp.fileName(), dssp='mkdssp') prevChain = next(iter(dssp.keys()))[0] for key in dssp.keys(): #print(key[0]) if key[0] == prevChain: #print(key) # I THINK I'M DOING THIS PART WRONG result[dssp[key][0] + self.offset] = dssp[key][2] self.finished.emit([result, self.seq, self.node]) except: traceback.print_exc() print("SORRY, DSSP WAS NOT FOUND") self.finished.emit([None, None, None]) del tmp, result, io, dssp, prevChain, key
def dssp(self): '''Get DSSP object''' if self._dssp is not None: return self._dssp #DSSP only works on the first model in the PDB file if isinstance(self._parent.pdb_file(), str) and self._id == 0: try: self._dssp = DSSP(self.model, self._parent.pdb_file()) except OSError: self._dssp = DSSP(self.model, self._parent.pdb_file(), dssp="mkdssp") elif self._id == 0: if self._parent._mmcif: suffix = '.cif' else: suffix = '.pdb' with NamedTemporaryFile(mode='w', suffix=suffix) as temp_pdb_file: temp_pdb_file.write(self._parent.pdb_file().read()) temp_pdb_file.flush() try: self._dssp = DSSP(self.model, temp_pdb_file.name) except OSError: self._dssp = DSSP(self.model, temp_pdb_file.name, dssp="mkdssp") else: self._dssp = {} return self._dssp
def test_dssp_with_mmcif_file_and_different_chain_ids(self): """Test DSSP generation from MMCIF which has different label and author chain IDs.""" if self.dssp_version < StrictVersion("2.2.0"): self.skipTest("Test requires DSSP version 2.2.0 or greater") pdbfile = "PDB/1A7G.cif" model = self.cifparser.get_structure("1A7G", pdbfile)[0] dssp = DSSP(model, pdbfile) self.assertEqual(len(dssp), 82) self.assertEqual(dssp.keys()[0][0], "E")
def ss_map_creator(self, struc_to_aln_index_mapping): ''' Connects the alignment mapping index and the secondary structural assignments from DSSP. ''' ss_aln_index_map = {} inv_map, model = self.structure_loader(struc_to_aln_index_mapping) dssp = DSSP(model, self.struc_path) for a_key in list(dssp.keys()): ss_aln_index_map[inv_map[a_key[1][1]]] = self.DSSP_code_mycode[ dssp[a_key][2]] return ss_aln_index_map
def depth_map_creator(self, struc_to_aln_index_mapping): '''Connects the alignment mapping index and the residue depth''' res_depth_aln_index_map = {} inv_map, model = self.structure_loader(struc_to_aln_index_mapping) dssp = DSSP(model, self.struc_path) #rd = ResidueDepth(model) for a_key in list(dssp.keys()): if dssp[a_key][3] > 0.2: res_depth_aln_index_map[inv_map[a_key[1][1]]] = 'E' else: res_depth_aln_index_map[inv_map[a_key[1][1]]] = 'B' return res_depth_aln_index_map
def test_dssp_with_mmcif_file_and_nonstandard_residues(self): """Test DSSP generation from MMCIF with non-standard residues.""" p = MMCIFParser() pdbfile = "PDB/1AS5.cif" model = p.get_structure("1AS5", pdbfile)[0] dssp = DSSP(model, pdbfile) self.assertEqual(len(dssp), 24)
def test_dssp(self): """Test DSSP generation from PDB.""" p = PDBParser() pdbfile = "PDB/2BEG.pdb" model = p.get_structure("2BEG", pdbfile)[0] dssp = DSSP(model, pdbfile) self.assertEqual(len(dssp), 130)
def CalculateHydrogenBonds(ProteinModel, Filename, EnergyCutoff, PathToDSSP): # Run DSSP algorithm DSSPOutput = DSSP(ProteinModel, Filename, dssp = PathToDSSP) # Assign structure HydrogenBonds = {} TotalNumberOfHydrogenBonds = 0 for Chain in ProteinModel: ChainID = Chain.get_id() for Residue in Chain: ResidueID = Residue.get_id() if is_aa(Residue.get_resname(), standard = True): HydrogenBonds[Chain, Residue] = 0 try: DSSPEntry = DSSPOutput[(ChainID, ResidueID)] if float(DSSPEntry[7]) < EnergyCutoff: HydrogenBonds[Chain, Residue] += 1 TotalNumberOfHydrogenBonds += 1 if float(DSSPEntry[11]) < EnergyCutoff: HydrogenBonds[Chain, Residue] += 1 TotalNumberOfHydrogenBonds += 1 except: sys.stderr.write("No DSSP entry generated for amino acid residue " + str(ResidueID[1]) + ". Ignoring the residue. \n") sys.stdout.write(str(TotalNumberOfHydrogenBonds) + " backbone N-O hydrogen bonds.\n") return HydrogenBonds
def test_DSSP_in_model_obj(self): """All elements correctly added to xtra attribute of input model object.""" p = PDBParser() s = p.get_structure("example", "PDB/2BEG.pdb") m = s[0] # Read the DSSP data into the pdb object: _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Sander", "DSSP") # Now compare the xtra attribute of the pdb object # residue by residue with the pre-computed values: i = 0 with open("PDB/dssp_xtra_Sander.txt") as fh_ref: ref_lines = fh_ref.readlines() for chain in m: for res in chain: # Split the pre-computed values into a list: xtra_list_ref = ref_lines[i].rstrip().split("\t") # Then convert each element to float where possible: xtra_list_ref = list(map(will_it_float, xtra_list_ref)) # The xtra attribute is a dict. # To compare with the pre-computed values first sort according to keys: xtra_itemts = sorted(res.xtra.items(), key=lambda s: s[0]) # noqa: E731 # Then extract the list of xtra values for the residue # and convert to floats where possible: xtra_list = [t[1] for t in xtra_itemts] xtra_list = list(map(will_it_float, xtra_list)) # The reason for converting to float is, that casting a float to a string in python2.6 # will include fewer decimals than python3 and an assertion error will be thrown. self.assertEqual(xtra_list, xtra_list_ref) i += 1
def test_dssp_with_mmcif_file(self): """Test DSSP generation from MMCIF.""" p = MMCIFParser() pdbfile = "PDB/2BEG.cif" model = p.get_structure("2BEG", pdbfile)[0] dssp = DSSP(model, pdbfile) self.assertEqual(len(dssp), 130)
def test_DSSP_RSA(self): """Tests the usage of different ASA tables.""" # Tests include Sander/default, Wilke and Miller p = PDBParser() # Sander/default: s = p.get_structure("example", "PDB/2BEG.pdb") m = s[0] # Read the DSSP data into the pdb object: _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Sander", "DSSP") # Then compare the RASA values for each residue with the pre-computed values: i = 0 with open("PDB/Sander_RASA.txt") as fh_ref: ref_lines = fh_ref.readlines() for chain in m: for res in chain: rasa_ref = float(ref_lines[i].rstrip()) rasa = float(res.xtra["EXP_DSSP_RASA"]) self.assertAlmostEqual(rasa, rasa_ref) i += 1 # Wilke (procedure similar as for the Sander values above): s = p.get_structure("example", "PDB/2BEG.pdb") m = s[0] _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Wilke", "DSSP") i = 0 with open("PDB/Wilke_RASA.txt") as fh_ref: ref_lines = fh_ref.readlines() for chain in m: for res in chain: rasa_ref = float(ref_lines[i].rstrip()) rasa = float(res.xtra["EXP_DSSP_RASA"]) self.assertAlmostEqual(rasa, rasa_ref) i += 1 # Miller (procedure similar as for the Sander values above): s = p.get_structure("example", "PDB/2BEG.pdb") m = s[0] _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Miller", "DSSP") i = 0 with open("PDB/Miller_RASA.txt") as fh_ref: ref_lines = fh_ref.readlines() for chain in m: for res in chain: rasa_ref = float(ref_lines[i].rstrip()) rasa = float(res.xtra["EXP_DSSP_RASA"]) self.assertAlmostEqual(rasa, rasa_ref) i += 1
def both_map_creator(self, struc_to_aln_index_mapping): '''Connects the alignment mapping index and the residue depth''' sda = {} inv_map, model = self.structure_loader(struc_to_aln_index_mapping) try: dssp = DSSP(model, self.struc_path) except OSError as e: raise OSError("DSSP failed with the following error:\n" + e) for a_key in list(dssp.keys()): if a_key[1][1] in inv_map.keys(): if dssp[a_key][3] > 0.2: sda[inv_map[a_key[1][1]]] = 'E' + self.DSSP_code_mycode[ dssp[a_key][2]] else: sda[inv_map[a_key[1][1]]] = 'B' + self.DSSP_code_mycode[ dssp[a_key][2]] return sda
def test_dssp_with_mmcif_file(self): """Test DSSP generation from MMCIF.""" if self.dssp_version < StrictVersion("2.2.0"): self.skipTest("Test requires DSSP version 2.2.0 or greater") pdbfile = "PDB/2BEG.cif" model = self.cifparser.get_structure("2BEG", pdbfile)[0] dssp = DSSP(model, pdbfile) self.assertEqual(len(dssp), 130)
def test_dssp_with_mmcif_file_and_nonstandard_residues(self): """Test DSSP generation from MMCIF with non-standard residues.""" if self.dssp_version < StrictVersion("2.2.0"): self.skipTest("Test requires DSSP version 2.2.0 or greater") pdbfile = "PDB/1AS5.cif" model = self.cifparser.get_structure("1AS5", pdbfile)[0] dssp = DSSP(model, pdbfile) self.assertEqual(len(dssp), 24)
def write_models(pdb_id, structure, config_params): # Write temporary PDB model files. One different file for each chain and model # Models can start from a number greater than 0 chain_model = {} # {chain: [(model_id, model_file, model_dssp), ...]} model_chain = {} # {model_id: [(chain_id, model_file, model_dssp), ...]} io = PDBIO() # Split models model_files = [] # [(model_id, model_file), ...] for model in structure: i = model.serial_num model_file = "{}/model_{}_{}".format(tmp_dir, pdb_id, i) model_files.append((i, model_file)) # Save model io.set_structure(structure) io.save(model_file, select=ModelSelect([i])) # Split chains. For each model save chain separately and get dssp for model_id, model_file in model_files: model_structure = PDBParser(QUIET=True).get_structure( model_id, model_file) # model_structure should have length 1 for model in model_structure: # print(i, model_file, model_structure, len(model_structure)) for chain in model: # Save chain models separately model_chain_file = "{}_{}".format(model_file, chain.id) io.set_structure(model_structure) io.save(model_chain_file, select=ChainSelect([chain.id])) # Calculate secondary structure for each chain separately (for Flipper) model_structure_chain = PDBParser(QUIET=True).get_structure( "{}_{}_{}".format(pdb_id, model_id, chain.id), model_chain_file) _dssp = None try: _dssp = DSSP(model_structure_chain[0], model_chain_file, dssp=config_params.get('dssp')) except Exception as e: logging.warning("{} DSSP error {} {} {}".format( pdb_id, model_file, chain.id, e)) chain_model.setdefault(chain.id, []).append( (model_id, model_chain_file, _dssp)) model_chain.setdefault(model_id, []).append( (chain.id, model_chain_file, _dssp)) return chain_model, model_chain
def get_secondary_structure_details(self, name, pdb_file, aa_only=False): parser = PDBParser() structure = parser.get_structure(name, pdb_file) dssp = DSSP(structure[0], pdb_file, acc_array="Wilke") ss = "".join([aa[2] for aa in dssp]) sasa = [residues[aa[1]] * aa[3] for aa in dssp] builder = PPBuilder() seq = "" for chain in builder.build_peptides(structure, aa_only=aa_only): seq += chain.get_sequence() return name, seq, ss, sasa, structure
def get_DSSPList(file): p = PDBParser() ''' # parses the pdb file ''' s = p.get_structure('X', file) ''' # getting the structure ''' model = s[0] d = DSSP(model, file,dssp=fileExe,acc_array=asaName) ''' # DSSP executable ''' dssp_dict,dssp_keys = dssp_dict_from_pdb_file(file,DSSP=fileExe) ''' # Create a dssp dictionary from a PDB file ''' #print (file) dssp_list = [] a_keys =list(d.keys()) #print(a_keys) for v in a_keys: rasa_values = (d[v]) ''' #values of the dictionary that gives the RASA values ''' acc_values = dssp_dict[v] '''# from the dictionary that provides the residue number, aa, and the acc value''' x = v[1][1],acc_values[0],acc_values[2],rasa_values[3] '''# residue number, amino acid, acc value and RASA value''' dssp_list.append(x) '''# creating a list of these values''' return dssp_list
def extract_feature(self): print_info_nn( " >>> Adding secondary structure for database {0} ... ".format( self._database.name)) overall_time = datetime.now() if not os.path.exists(self.__get_dir_name()): os.mkdir(self.__get_dir_name()) for complex_name in self._database.complexes.keys(): protein_complex = self._database.complexes[complex_name] proteins = [ protein_complex.unbound_formation.ligand, protein_complex.unbound_formation.receptor ] for protein in proteins: dssp_file = self.__get_dir_name() + protein.name + ".npy" if not os.path.exists(dssp_file): print_info_nn("... running DSSP for protein " + protein.name) start_time = datetime.now() dssp = DSSP( protein.structure[0], self._database.directory + pdb_directory + protein.name + ".pdb") dssp_array = np.ndarray((len(protein.residues), 6)) for (i, res) in enumerate(protein.biopython_residues): (_, _, cid, rid) = res.get_full_id() key = (cid, rid) if key in dssp: dssp_array[i, 2:] = (dssp[key])[2:] else: dssp_array[i, 2:] = [0, 0, 0, 0] # print_error("WTH") # sys.exit(0) # print('here') # pdb.set_trace() # self.SS[:, index] = np.nan # self.ASA[index] = np.nan # self.rASA[index] = np.nan # self.Phi[index] = np.nan # self.Psi[index] = np.nan np.save(dssp_file, dssp_array) print_info("took {0} seconds.".format( (datetime.now() - start_time).seconds)) dssp = np.load(dssp_file) for i, res in enumerate(protein.residues): # (_, s, ASA, rASA, phi, psi) res.add_feature(Features.ACCESSIBLE_SURFACE_AREA, dssp[i, 2]) res.add_feature(Features.RELATIVE_ACCESSIBLE_SURFACE_AREA, dssp[i, 3]) res.add_feature(Features.PHI, dssp[i, 4]) res.add_feature(Features.PSI, dssp[i, 5]) print_info("took {0} seconds.".format( (datetime.now() - overall_time).seconds))
def get_asa(residues): pdb_id = residues[0].get_full_id()[0] chain_id = residues[0].get_full_id()[2] structure = PDBParser(QUIET=True).get_structure(pdb_id, pdb_folder_path+"pdb{}.ent".format(pdb_id)) dssp = DSSP(structure[0], pdb_folder_path+"/pdb{}.ent".format(pdb_id), dssp=src_folder_path+"bin/xssp-master/mkdssp") dssp = dict(dssp) # Convert to dict to access residues tot_residues = len(residues) ss_content = {} surface = 0 for residue in residues: if dssp.get((chain_id, residue.id)): tot_residues += 1 dssp_index, aa, ss, asa, phi, psi, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy = dssp.get((chain_id, residue.id)) surface += asa ss_content.setdefault(ss, 0) ss_content[ss] += 1 #for ss in ss_content: #print "{} {} {} {:.2f}".format(chain.id, ss, ss_content[ss], float(ss_content[ss]) / tot_residues) #print "{} ASA {:.2f} {:.2f}\n".format(chain.id, surface, float(surface) / tot_residues) return float(surface) / tot_residues
def run_DSSP(self, corresponding_gene_call, pdb_filepath): """ DSSP is ran using the API developed in Biopython. That means we don't work directly from the text output of DSSP, but rather a Biopython object. """ # Determine the model name by loading the structure file p = PDBParser() structure = p.get_structure(corresponding_gene_call, pdb_filepath) model = structure[0] # pdb files can have multiple models. DSSP assumes the first. # run DSSP residue_annotation = DSSP(model, pdb_filepath, dssp = self.DSSP_executable, acc_array = "Wilke") if not len(residue_annotation.keys()): raise ConfigError("Your executable of DSSP, `{}`, exists but didn't return any meaningful output. This\ is a known issue with certain distributions of DSSP. For information on how to test\ that your version is working correctly, please visit\ http://merenlab.org/2016/06/18/installing-third-party-software/#dssp"\ .format(self.DSSP_executable, pdb_filepath)) # convert to a digestible format return self.convert_DSSP_output_from_biopython_to_dataframe(residue_annotation)
def __init__(self, pdbdata, mode='cpk'): #Startup app.Canvas.__init__(self, keys='interactive', size=(W, H)) #Loading shaders self.program = gloo.Program(vertex, fragment) #Analyze pdb file self.parser = PDBParser(QUIET=True, PERMISSIVE=True) self.structure = self.parser.get_structure('model', pdbdata) #DSSP prediction self.pmodel = self.structure[0] self.dssp = DSSP(self.pmodel, pdbdata) #Mode selection if mode not in Canvas.visualization_modes: raise Exception('Not recognized visualization mode %s' % mode) self.mode = mode #Camera settings self.translate = 50 self.translate = max(-1, self.translate) self.view = translate((0, 0, -self.translate), dtype=np.float32) self.model = np.eye(4, dtype=np.float32) self.projection = np.eye(4, dtype=np.float32) self.program['u_projection'] = self.projection self.quaternion = Quaternion() #Load data depending on the mdoe self.apply_zoom() self.atom_information() self.load_data() self.show()
def __init__(self, pdbdata, mode='cpk'): #Analyze pdb file self.parser = PDBParser(QUIET=True,PERMISSIVE=True) self.structure = self.parser.get_structure('model',pdbdata) #DSSP prediction self.model = self.structure[0] self.dssp = DSSP(self.model, pdbdata) #Mode selection if mode not in MatViewer.visualization_modes: raise Exception('Not recognized visualization mode %s' % mode) self.mode = mode #Make the plot if self.mode == 'cpk': self.cpk2d() elif self.mode == 'backbone': self.bb2d() elif self.mode == 'aminoacid': self.aa2d() elif self.mode == 'dssp': self.dssp2d()
def calculate_solvent_access_score(self, threshold): ''' Calculate the accessibility score between the predicted model and the template pdb structure. Args: threshold cutoff (int): Cutoff for relative accessibility residue values. Returns: float: The Accessibility score calculated ''' # Path to the PDBs of predicted and template models pred_model_pdb = "data/templates/" + self.template.name + "/" + self.template.modeller_pdb + ".atm" template_pdb = "data/templates/" + self.template.name + "/" + self.template.reindexed_pdb + ".atm" # Parse PDBs pred_model = PDBParser(QUIET=True).get_structure( "pred_model", pred_model_pdb)[0] template_model = PDBParser(QUIET=True).get_structure( "template_model", template_pdb)[0] # Run DSSP on both PDB files of the template and the Modeller's model dssp_pred_model = DSSP(pred_model, pred_model_pdb, dssp="bin/dssp-2.0.4-linux-amd64") dssp_template_model = DSSP(template_model, template_pdb, dssp="bin/dssp-2.0.4-linux-amd64") # Parse the DSSP output to retrieve the relative % of solvant accessible area for each CA. #get alignement index query_index_ali = [ index for index, residue in enumerate(self.query.residues) if str(residue) != "-" ] template_index_ali = [ index for index, residue in enumerate(self.template.residues) if str(residue) != "-" ] #attribuate alignemnt index rsa_pred_model = dict( zip(query_index_ali, [dssp_pred_model[key][3] for key in dssp_pred_model.keys()])) rsa_template_model = dict( zip(template_index_ali, [ dssp_template_model[key][3] for key in dssp_template_model.keys() ])) # Keep only residues under a relative accessibilities threshold: buried residues pred_access_residues = keep_accessible_residues( rsa_pred_model, threshold) template_access_residues = keep_accessible_residues( rsa_template_model, threshold) # Get the common buried residues common_residues_len = len( set(pred_access_residues).intersection(template_access_residues)) # Normalization return common_residues_len / len(query_index_ali)
def get_DSSP(pdb_ids, pdb_dir='.', dssp_path='/usr/local/bin/mkdssp', drop_features=['DSSP_ID', 'AA']): # Check parameters logging.debug('PDB ids: {}'.format(pdb_ids)) logging.debug('PDB directory: \'{}\''.format(pdb_dir)) # Define a list of dssp features (which will be stored in a list before being turned into DataFrame ds_dssp = list() # Loop thorugh every protein for pdb_id in pdb_ids: # Parse structure of the protein structure = PDBParser(QUIET=True).get_structure(pdb_id, pdb_dir + '/pdb{}.ent'.format(pdb_id)) # Get only first model model = structure[0] # Define DSSP instance of the 0-th model dssp = DSSP(model, pdb_dir + '/pdb{}.ent'.format(pdb_id), dssp="/usr/local/bin/mkdssp") # Get DSSP features: dssp index, amino acid, secondary structure, relative ASA, phi, psi, NH_O_1_relidx, # NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy # Get chain id and residue id for ids, res in zip(dict(dssp), dssp): # Create the DSSP row row = list() row.append(pdb_id) row.extend(list(ids)) row.extend(list(res)) # Add row to dssp list ds_dssp.append(row) # Define feature names columns = ['PDB_ID', 'CHAIN_ID', 'RES_ID', 'DSSP_ID', 'AA', 'SEC_STRUCT', 'REL_ASA', 'PHI', 'PSI', 'NH_O_1_relidx', 'NH_O_1_energy', 'O_NH_1_relidx', 'O_NH_1_energy', 'NH_O_2_relidx', 'H_O_2_energy', 'O_NH_2_relidx', 'O_NH_2_energy'] # Define DSSP DataFrame ds_dssp = pd.DataFrame(ds_dssp, columns=columns) # Turn RES_ID from tuple to integer (gets 1-st element) ds_dssp.RES_ID = ds_dssp.RES_ID.apply(lambda x: x[1]) # Drop useless features, if any if drop_features: ds_dssp = ds_dssp.drop(drop_features, axis=1) # Turns NA to nan ds_dssp = ds_dssp.replace('NA', np.nan) # Handle nan ds_dssp.loc[ds_dssp.REL_ASA.isna(), 'REL_ASA'] = ds_dssp.REL_ASA.mean() # Return DSSP dataset return ds_dssp
def get_ss(file): p = PDBParser() pdbl = PDBList() try: p.get_structure("structure", pdbl.retrieve_pdb_file(file, file_format='pdb')) f = os.popen('find . -iname *{}*.ent'.format(file)) path = f.read().replace('\n', '') structure = p.get_structure("", path) model = structure[0] dssp = DSSP(model, path) ss_holder = { '-': 0, 'T': 0, 'S': 0, 'H': 0, 'B': 0, 'E': 0, 'G': 0, 'I': 0 } for entry in dssp: if entry[2] not in ss_holder: ss_holder[entry[2]] = 1 else: ss_holder[entry[2]] += 1 sorted(ss_holder.keys()) return ss_holder except FileNotFoundError: pass
def main(): """ main function """ parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('pdb', help='Input PDB file') parser.add_argument('-o', '--output', help='Output directory') args = parser.parse_args() basename = os.path.splitext(os.path.basename(args.pdb))[0] parser = PDBParser() structure = parser.get_structure('structure', args.pdb) atom_output = [] residue_output = [] c_alpha_output = [] model = structure[0] dssp = DSSP(model, args.pdb, dssp='mkdssp') for chain in model: for residue in chain: residue_sph_coord = [] c_alpha_output.append(spherical(*residue['CA'].get_coord())) for atom in residue: sph_coord = spherical(*atom.get_coord()) atom_output.append(sph_coord) residue_sph_coord.append(sph_coord) residue_output.append(np.mean(residue_sph_coord, axis=0)) atom_output = np.array(atom_output).T residue_output = np.array(residue_output).T c_alpha_output = np.array(c_alpha_output).T if args.output: output_file = os.path.join(args.output, basename) else: output_file = basename np.savez(output_file, atom_wise=atom_output, residue_wise=residue_output, c_alpha=c_alpha_output, dssp=dssp)
if os.path.exists(filename): pdb_path = filename elif os.path.exists(filename[:-3]): # if the file is not gzipped pdb_path = filename[:-3] else: sys.stderr.write(bestmodel + " does not have a pdb file!") break pdb_parser = PDBParser() if pdb_path.endswith('.gz'): structure = pdb_parser.get_structure(bestmodel, GzipFile(pdb_path)) else: structure = pdb_parser.get_structure(bestmodel, file(pdb_path)) model = structure[0] dssp = DSSP(model, pdb_path, dssp="dssp", acc_array="Wilke") buried = [] exposed = [] for t in dssp.property_list: aa = t[1] # one letter codes ss = t[2] # secondary structure rsa = t[3] # relative surface accessibility if rsa >= RSA_EXPOSED_THRESH: exposed.append(aa) else: buried.append(aa) exposed = Counter(exposed)
from difflib import * import numpy as np import tables as tb ############################################################################################################################## ###################################################### PARSE DSSP ############################################################ ############################################################################################################################## fa= open(sys.argv[1]).read().splitlines() filename=sys.argv[2] #this will eventually become the pdb file that we run dssp on dssp=open(filename) p=PDBParser() structure = p.get_structure(filename, dssp) model= structure[0] #there is only one structure for dssp (NMR for example has more) and the dssp parser can only take one structure dssp= DSSP(model, filename) a_key = list(dssp.keys()) statedic= {'H':1, 'I':2 , 'G':3, 'E':4, 'B':5, 'T':6, 'S':7, '-':8} #, '-':0} dsspAA=[ ] states= [ ] for line in a_key: # print dssp[line] dsspAA.append(dssp[line][1]) states.append(statedic[dssp[line][2]]) ############################################################################################################################## ######################################################ONE HOT ENCODING ####################################################### ##############################################################################################################################