def get_SS(structure, path_structure): """ given a structure return a list of 3 features coresspondig to the secondary structure for each residue Helix : (0,0,1) Sheet : (0,1,0) Coil : (1,0,0) """ list_SS = [] dssp = DSSP(structure[0], path_structure) list_dssp = list(dssp) list_dssp_features = [] for i in range(len(list_dssp)): value_dssp = list(list_dssp[i]) if value_dssp[2] == "G" or\ value_dssp[2] == "H" or\ value_dssp[2] == "I": list_dssp_features.append([1,0,0]) elif value_dssp[2] or value_dssp[2] == "B" or value_dssp[2] =="E": list_dssp_features.append([0,1,0]) elif value_dssp[2] or value_dssp[2] == "T" or\ value_dssp[2] =="S" or value_dssp[2] =="-": list_dssp_features.append([0,1,0]) return(list_dssp_features)
def getDSSP(pdb_file): parser = PDBParser() name = os.path.splitext(os.path.basename(pdb_file))[0] structure = parser.get_structure(name, pdb_file) model = structure[0] dssp = DSSP(model, pdb_file, dssp='mkdssp') return dssp
def get_surface_residues(filename, my_acc_array, my_threshold): """ Given a pdb file, finds the residues exposed to the solvent (not buried) according to the ASA (accessible surface area) value given by DSSP module. The user can select a threshold of ASA. Default is 0.2. """ p = PDBParser(PERMISSIVE=1) s = p.get_structure("code.pdb", filename) model = s[0] d = DSSP(model, filename, dssp='mkdssp', acc_array=my_acc_array) sys.stderr.write("\nHandled %i residues\n" % len(d)) residue_number = set() for element in sorted(d): if type(element[3]) is not str: #Sometimes the element[3] is NA if element[3] >= my_threshold: # foreach aa in the surface (according to threshold) store residue_number try: residue_number.add( str(list(d.keys())[element[0] - 1][1][1]) + list(d.keys())[element[0] - 1][0]) except IndexError: sys.stderr.write("Element " + str(d.keys()[0]) + " index out of range\n") return residue_number
def secondary_torsions_casp(domain): #, start, end): """Extract Secondary structure and torsion angles using the DSSP package""" structure = PDBParser().get_structure('', f'../../data/pdbfiles/{domain}.pdb') try: raw = DSSP(structure[0], f'../../data/pdbfiles/{domain}.pdb') except: print('PDBException. Nothing we can do') return None, None dssp = np.array(raw.property_list, dtype='O') sequence = ''.join(dssp[:, 1]) sec_torsions = dssp[:, [2, 4, 5]] # translating torsion angles to range (-180, 180) for i in range(sec_torsions.shape[0]): for j in range(1, 3): if sec_torsions[i, j] > 180: sec_torsions[i, j] = sec_torsions[i, j] - 360 elif sec_torsions[i, j] < -180: sec_torsions[i, j] = 360 - sec_torsions[i, j] return sec_torsions
def make_a_dssp_model(model, pdb_filepath, dssp='dssp'): #ticked """ Parameters ---------- model : TYPE The Structure class contains a collection of Model instances. pdb_filepath : str DESCRIPTION. dssp : str, optional DESCRIPTION. The default is 'dssp'. Returns ------- dssp_object : Bio.PDB.DSSP.DSSP parsed secondary structure and accessibility. """ #DSSP class, which maps Residue objects to their secondary structure try: dssp_object = DSSP(model, pdb_filepath, dssp) except Exception as e: if type(e) is not Exception: raise print('oops') return dssp_object
def _phi_psi_dic(model, pdb_filename): dssp = DSSP(model, pdb_filename, dssp='mkdssp') phi_psi_dic = {} for x in dssp.property_dict: phi_psi_dic[x[1][1]] = str(round( dssp.property_dict[x][4], -1))[:-2] + "_" + str( round(dssp.property_dict[x][5], -1))[:-2] return phi_psi_dic
def calc_dssp(model, chain_sites: dict, pdb_name: str) -> None: # DSSP # ============ === # Tuple Index Value # ============ === # 0 DSSP index # 1 Amino acid # 2 Secondary structure # 3 Relative ASA # 4 Phi # 5 Psi # 6 NH-->O_1_relidx # 7 NH-->O_1_energy # 8 O-->NH_1_relidx # 9 O-->NH_1_energy # 10 NH-->O_2_relidx # 11 NH-->O_2_energy # 12 O-->NH_2_relidx # 13 O-->NH_2_energy # ============ === try: dssp = DSSP(model, pdb_name, dssp="mkdssp") except: dssp = {} print("dssp failed!") for residue in dssp.keys(): ( dssp_i, aa, sec_struct, sasa_r, phi, psi, nh_o1_relidx, nh_o1_e, o_nh1_relidx, o_nh1_e, nh_o2_relidx, nh_o2_e, o_nh2_relidx, o_nh2_e, ) = dssp[residue] chain_id, res_id = residue _, resnumb, _ = res_id if chain_id in chain_sites and resnumb in chain_sites[chain_id]: resid = chain_sites[chain_id][resnumb][1] new_res = chain_sites[chain_id][resnumb][2] new_res.sec_struct = sec_struct new_res.sasa_r = sasa_r new_res.phi = phi new_res.psi = psi session.commit()
def _run(self, pdbcode, chains): pdb_file = os.path.join(PDB_PATH, 'pdb' + pdbcode + '.ent') p = PDBParser() structure = p.get_structure(pdbcode, pdb_file) model = structure[0] dssp = DSSP(model, pdb_file, dssp=dssp_route) valid_keys = [key for key in dssp.keys() if key[0] in chains] chain = dssp.keys()[0][0] return [dssp[key] for key in valid_keys], chain
def get_dssp_amino_acid_sequence(pdb_path, dssp_path): structure = Bio.PDB.PDBParser(QUIET=True).get_structure( pdb_path[:-4], pdb_path) model = structure[0] dssp = DSSP.DSSP(model, dssp_path) aa = '' for key in list(dssp.keys()): #chain_id = key[0] #if chain_id == letters[chain_index]:#'A': # first chain aa += dssp[key][1] return aa
def get_dssp_torsion_angles(pdb_path, dssp_path): structure = Bio.PDB.PDBParser(QUIET=True).get_structure( pdb_path[:-4], pdb_path) model = structure[0] dssp = DSSP.DSSP(model, dssp_path) phi = '' psi = '' for key in list(dssp.keys()): phi += dssp[key][4] psi += dssp[key][5] return phi, psi
def calc_features(PATH, PDB_id, OUTPATH): #Loading the files parser = PDBParser(PERMISSIVE=1) filename = os.path.join(PATH, PDB_id + ".pdb") structure = parser.get_structure(PDB_id, filename) model = structure[0] #DSSP Analysis for SS, PHI, PSI dssp = DSSP(model, filename) #NACCESS Analysis for SASA rsa, asa = run_naccess(model, filename) rsa = process_rsa_data(rsa) #Feature mapping to each atomic coordinate dssp_present, dssp_not_present = 0, 0 feature = dict() #The feature dictionary for model in structure: for chain in model: for residue in chain: for atom in residue: print(atom.get_full_id()) ID = (atom.get_full_id()[2], atom.get_full_id()[3]) if (ID in list(dssp.keys())): if (rsa[ID]["all_atoms_abs"] > Threshold): rsa_label = 1 else: rsa_label = 0 feat = (SS_Labels[dssp[ID][2]], dssp[ID][4] / 360, dssp[ID][5] / 360, rsa_label) feature[tuple(atom.get_coord())] = feat print(ID, atom.get_coord(), feat) dssp_present += 1 else: print("==> ID not present : ", atom.get_full_id()) dssp_not_present += 1 #Printing the Stats print( "==> STATS : PDBID : %s , DSSP PRESENT : %s , DSSP NOT PRESENT : %s" % (PDB_id, dssp_present, dssp_not_present)) #Saving the feature to each PDB file with open(os.path.join(OUTPATH, PDB_id + ".dat"), "wb+") as f: pickle.dump(feature, f) print("==> Dump completed")
def load_data(self): """ Load all the pdb files in the folder. Create BIO.PDB.DSSP.DSSP object to stock each data's file """ stock = [] pdb = self.m_folder + self.m_id + ".pdb" p = PDBParser() structure = p.get_structure("prot", pdb) model = structure[0] dssp = DSSP(model, pdb) stock.append(dssp) return stock
def get_dssp_secondary_structure(pdb_path, dssp_path): structure = Bio.PDB.PDBParser(QUIET=True).get_structure( pdb_path[:-4], pdb_path) model = structure[0] dssp = DSSP.DSSP(model, dssp_path) q8 = '' for key in list(dssp.keys()): #chain_id = key[0] #if chain_id == letters[chain_index]:#'A': # first chain #aa += dssp[key][1] q8 += dssp[key][2] return q8
def get_dssp_absolute_surface_area(pdb_path, dssp_path): structure = Bio.PDB.PDBParser(QUIET=True).get_structure( pdb_path[:-4], pdb_path) model = structure[0] dssp = DSSP.DSSP(model, dssp_path) asa = '' #letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' # chain_id is in letters for key in list(dssp.keys()): #chain_id = key[0] #if chain_id == letters[chain_index]:#'A': # first chain asa += dssp[key][3] return asa
def getDSSP(model, dssp_map=None, feature_name='secondary_structure', formatstr="{}({})", clean=True): try: from Bio.PDB.DSSP import DSSP except ModuleNotFoundError: raise ModuleNotFoundError( "The module 'Bio.PDB.DSSP' is required for this functionality!") if dssp_map is None: # map eight ss types to three dssp_map = { "H": formatstr.format(feature_name, "H"), "G": formatstr.format(feature_name, "H"), "I": formatstr.format(feature_name, "H"), "E": formatstr.format(feature_name, "S"), "B": formatstr.format(feature_name, "L"), "T": formatstr.format(feature_name, "L"), "S": formatstr.format(feature_name, "L"), "-": formatstr.format(feature_name, "L") } # Write a PDB file pdb_file = model.save() # run DSSP using the DSSP class from BioPython dssp = DSSP(model, pdb_file) # store secondary structure in each atom property dict keys = list(sorted(set(dssp_map.values()))) for chain in model: cid = chain.get_id() for residue in chain: rid = residue.get_id() dkey = (cid, rid) if dkey in dssp: ss = dssp_map[dssp[dkey][2]] else: ss = dssp_map['-'] for atom in residue: atom.xtra[keys[0]] = 0.0 atom.xtra[keys[1]] = 0.0 atom.xtra[keys[2]] = 0.0 atom.xtra[ss] = 1.0 if clean: os.remove(pdb_file) return keys
def get_mali_structure_stats(root): """ Reads in the manual alignments and obtains stats. Parameters ---------- root : path Path to root directory Returns ------- pd.DataFrame alpha residues beta residues """ from Bio.PDB import PDBParser from Bio.PDB.DSSP import DSSP res = [] tool = 'manual' for path, directories, files in os.walk(root): for f in files: if '.pdb' in f and tool in f: fname = os.path.join(path, f) parser = PDBParser() # ids = os.path.basename(fname).split('_') structs = parser.get_structure('', fname) dssp1 = DSSP(structs[0], fname, dssp='mkdssp') classes1 = list(map(lambda x: x[2], dssp1)) len1 = len(classes1) classes1 = pd.Series(Counter(classes1)) classes1.index = list(map(lambda x: 'x' + x, classes1.index)) pdb_name = os.path.basename(f).split('.')[0] # stats = pd.concat((classes1, classes2)) stats = classes1 stats['pdb'] = pdb_name stats['path'] = fname stats['xlen'] = len1 # dssp2 = DSSP(structs[1], fname, dssp='mkdssp') # classes2 = list(map(lambda x: x[2], dssp2)) # len2 = len(classes2) # classes2 = pd.Series(Counter(classes2)) # classes2.index = list(map(lambda x: 'y' + x, classes2.index)) # stats['ylen'] = len2 res.append(stats) res = pd.DataFrame(res) return res
def getSecondaryStructure(pdbID): pdb.fetchInPDBFormat(pdbID) p = PDBParser() structure = p.get_structure(pdbID, pdb.getTemporaryPDBPath()) model = structure[0] dssp = DSSP(model, pdb.getTemporaryPDBPath()) seq = "" secStructure = "" for key in dssp.keys(): seq = seq + dssp[key][1] secStructure = secStructure + dssp[key][2] # print("Secuencia: ", seq + " de longitud " + str(len(seq))) # print("Estructura secundaria: ", secStructure + " de longitud " + str(len(secStructure))) return secStructure
def accessible_surface_area(PDB_file): # Calcul de la surface accessible au solvant pour chaque residus ASA_dict = {} parser = PDBParser() structure_id = PDB_file.split(".")[0] structure = parser.get_structure(structure_id, PDB_file) model = structure[0] dssp = DSSP(model, PDB_file, dssp='mkdssp') id_CA = 0 for CA in list(dssp.keys()): if dssp[CA][1] != 'X': ASA_dict[id_CA] = dssp[CA][3] id_CA += 1 return ASA_dict
def calculate_asa(model, filename, AROM_LIST, chain_list): """Returns a list of surface exposed residues as determined by relative solvent accessibility. Only standard protein residues are currently supported. Non-protein and user specified custom residues cannot be classified as surface exposed using this criteria. Parameters --------- model: :class:`Bio.PDB.Model.Model` Model which contains chains and residues of protein strucutre filename: str Name of pdb file to be analyzed AROM_LIST : list of str List containing which standard residues are included in analysis chain_list: list of str Chains are included in analysis Notes ----- The relative accessible surface area (RSA) of each residue is calculated using the Bio.PDB.DSSP module. A residue with an RSA value of 0.05 or higher is classified as surface exposed. References --------- Tien, M. Z.; Meyer, A. G.; Sydykova, D. K.; Spielman, S. J.; Wilke, C. O. PLoS ONE 2013, 8 (11). Reference for relative solvent accessibility cutoff of 0.05, and for MaxASA values """ cutoff = .05 surface_exposed_res = [] letter_list = [] for res_name in AROM_LIST: if res_name_to_char.get(res_name): letter_list.append(res_name_to_char.get(res_name)) try: dssp = DSSP(model, filename, acc_array="Wilke") keys = list(dssp.keys()) for key in keys: if key[0] in chain_list and dssp[key][3] >= cutoff and dssp[key][ 1] in letter_list: goal_str = dssp[key][1] + \ str(key[1][1]) + "(" + str(key[0]) + ")" surface_exposed_res.append(goal_str) except Exception as e: warnings.warn( "Unable to calculate solvent accessibility. Check that DSSP is installed.", RuntimeWarning, stacklevel=2) return surface_exposed_res
def __init__(self, pdbfile, fastafile): names = {'HIS':'H','ASP':'D','ARG':'R','PHE':'F','ALA':'A','CYS':'C','GLY':'G',\ 'GLN':'Q','GLU':'E','LYS':'K','LEU':'L','MET':'M','ASN':'N','SER':'S',\ 'TYR':'Y','THR':'T','ILE':'I','TRP':'W','PRO':'P','VAL':'V','SER':'S'} # Load fasta residue sequence f = open(fastafile) ff = [line.rstrip("\n") for line in f] f.close() p_id = ff[0][1:] self.seq = ff[1] # Load pdb information p = PDBParser(PERMISSIVE=1) st = p.get_structure(p_id, pdbfile) model = st[0] tag = p_id[-1] chain = model[tag] residues = chain.get_residues() self.residues = [res for res in residues] ## sequence info from pdb self.pdbseq = "".join([names[res.get_resname()] for res in self.residues if \ names.has_key(res.get_resname())]) ## 3-state sse info from pdb dssp = DSSP(model, pdbfile) to3_dict = {'-':'C', 'G':'H', 'H':'H', 'I':'H', 'E':'E', 'B':'E', 'T':'C', \ 'S':'C', 'L':'C'} keys = list(dssp.keys()) self.pdbss3seq = "".join([to3_dict[dssp[k][2]]for k in keys]) # Align the pdb sequence(always missing some residues) to fasta sequence alignment = AlignNW(self.seq, self.pdbseq) self.re_index = alignment['j'] self.re_index = [i-1 for i in alignment['j']] # minus 1 for indexing # generate sequence alignment between pdb sequence and fasta sequence self.alignment = "".join([self.pdbseq[i] if i > -1 else "-" for i in self.re_index]) self.alignment = "\n".join([self.seq, self.alignment]) # generate full lenght of 3-state SSE sequence according to re-index self.ss3seq = "".join([self.pdbss3seq[i] if i > -1 else "C" for i in self.re_index]) # generate full lenght of distance matrix(distance=-1 when disappear in pdbseq) self.dist_matrix = self.generate_dist_matrix() #np.savetxt("test.txt", self.dist_matrix) # generate full lenght of angle matrix(distance=None when disappear in pdbseq) self.angle_matrix = self.generate_angle_matrix()
def getDSSP( struct, fname, dsspPath=os.path.expanduser( "~/Tesis/rriPredMethod/dependencies/bioinformaticTools/dssp/mkdssp")): dssp = DSSP(struct[0], fname, dssp=dsspPath) chains = struct[0].child_list dsspDict = { chain.get_id(): {symbol: [] for symbol in DSSP_SYMBOLS} for chain in chains } for chainId, resId in dssp.keys(): secStruct = dssp[(chainId, resId)][2] dsspDict[chainId][secStruct].append(resId) return dsspDict
def get_dssp_amino_acid_sequences(pdb_path, dssp_path): structure = Bio.PDB.PDBParser(QUIET=True).get_structure( pdb_path[:-4], pdb_path) model = structure[0] dssp = DSSP.DSSP(model, dssp_path) aa = '' aas = [] prev_chain_id = '' for key in list(dssp.keys()): chain_id = key[0] if chain_id != prev_chain_id: prev_chain_id = chain_id if aa != '': aas.append(aa) aa = '' #if chain_id == letters[chain_index]:#'A': # first chain aa += dssp[key][1] return aas
def __applyDssp(self): import Bio.PDB as bio print('PSU: applying dssp') from Bio.PDB.DSSP import DSSP p = bio.PDBParser() pdbFile = self.pdbDataPath + 'pdb' + self.pdbCode + '.ent' structure = p.get_structure(self.pdbCode, pdbFile) model = structure[0] dssp = DSSP(model, pdbFile) for akey in list(dssp.keys()): chain = akey[0] res_no = akey[1][1] row = dssp[akey] ss = row[2] for atom in self.atoms: if atom.values['rid'] == res_no and atom.values[ 'chain'] == chain: atom.setDsspInfo(ss) print('PSU: applied dssp successfully')
def recup_acc_solvant(fichier_pdb): """ entrée le fichier pdb sortie une liste de l'accesibilité relative (entre 0 et 1) des résidues utilise Biopython et le programme dssp """ acces_solvant = [] p = PDBParser() id_structure = fichier_pdb.split(".")[0] structure = p.get_structure(id_structure, fichier_pdb) model = structure[0] dssp = DSSP(model, fichier_pdb, dssp = 'mkdssp') for i in range(len(list(dssp))): acces_solvant.append(round((list(dssp)[i][3]),3)) #ajout de chaque élément arrondi au millième return acces_solvant
def SecStr(pdb_id, chain_id, start, stop): #Change pdb_id to lower cases - as in local pdb db. pdb_id = pdb_id.lower() #Read pdb structure if it exists. p = PDBParser() try: structure = p.get_structure(pdb_id, f'/home/m.pak/pdb/pdb{pdb_id}.pdb') except FileNotFoundError: print(f'File not found, proceed... {pdb_id}') return None, None, None model = structure[0] #Run DSSP. try: dssp = DSSP(model, f'/home/m.pak/pdb/pdb{pdb_id}.pdb') except: print(f'DSSP unable to process the structure {pdb_id}, proceed...') return None, None, None #Keep annotation of secondaty structure elements, Phi and Psi angles for defined region of structure. sec_str = '' phi_lst = [] psi_lst = [] #INCLUDES STOP!!!! for num in range(start, stop+1): try: res_key = (chain_id, (' ', num, ' ')) #Can not deal with hetero-flag and insertion code res = dssp[res_key] except: print(f'{res_key} not found in {pdb_id}, proceed...') continue sec_str += res[2] phi_lst.append(res[4]) psi_lst.append(res[5]) return sec_str, phi_lst, psi_lst
def secondary_torsions(domain): #, start, end): """Extract Secondary structure and torsion angles using the DSSP package""" domain_id = domain[:4] chain_id = domain[4] structure = PDBParser().get_structure( '', f'../../data/pdbfiles/{domain_id}.pdb') try: raw = DSSP(structure[0], f'../../data/pdbfiles/{domain_id}.pdb') except: print('PDBException. Nothing we can do') return None, None dssp = np.array(raw.property_list, dtype='O') # extract chain #keys = np.array([i[0] for i in raw.keys()]) #positions = np.array([int(i[1][1]) for i in raw.keys()]) #positions = positions[keys == chain_id] #dssp = dssp[keys == chain_id] sequence = ''.join(dssp[:, 1]) sec_torsions = dssp[:, [2, 4, 5]] # translating torsion angles to range (-180, 180) for i in range(sec_torsions.shape[0]): for j in range(1, 3): if sec_torsions[i, j] > 180: sec_torsions[i, j] = sec_torsions[i, j] - 360 elif sec_torsions[i, j] < -180: sec_torsions[i, j] = 360 - sec_torsions[i, j] #try: # dssp_start, dssp_end = np.where(positions == start)[0][0], np.where(positions == end)[0][0] #except IndexError: # print(domain, 'positions not found') # return None, None return sec_torsions, sequence #sec_torsions[dssp_start:(dssp_end + 1)], sequence[dssp_start:(dssp_end + 1)]
def retrieve_secondary_struc(chain, input_path): # Uses biopython's built-in DSSP Function to retrieve the secondary structure # note! STRIDE and DSSP agree in 95,4% of the cases. DSSP tends to assign shorter secondary structures # Must have DSSP installed --> see tutorial for instructions # conda install -c salilab dssp # https://en.wikipedia.org/wiki/STRIDE # H - Alpha-Helix # B - Isolated Beta-Bridge # E - Strand # G - 3-10 Helix # I - Pi helix # T - Turn # S - Bend model = chain.get_parent() res_list = Selection.unfold_entities(chain, "R") chain_len = len(res_list) dssp = DSSP(model, input_path) dssplist = list(dssp)[:chain_len] seq = [row[1] for row in dssplist] seq = ''.join(seq) struc = [row[2] for row in dssplist] struc = ''.join(struc) struc = struc.replace('-', ' ') if len(struc) > len(seq): struc = struc[0:len(seq)] if len(seq) != len(res_list): warnings.warn( f'PDB file and Secondary structure map do not match!\n {chain.get_parent().get_parent().id} - PDB: {len(res_list)} Residues VS. SS: {len(seq)} Residues. ' ) return seq, struc
def getDSSP(model, PDBFileName, dssp_map=None, feature_name='secondary_structure', formatstr="{}({})"): if (dssp_map is None): # map eight ss types to three dssp_map = { "H": formatstr.format(feature_name, "H"), "G": formatstr.format(feature_name, "H"), "I": formatstr.format(feature_name, "H"), "E": formatstr.format(feature_name, "S"), "B": formatstr.format(feature_name, "L"), "T": formatstr.format(feature_name, "L"), "S": formatstr.format(feature_name, "L"), "-": formatstr.format(feature_name, "L") } # run DSSP using the DSSP class from BioPython dssp = DSSP(model, PDBFileName) # store secondary structure in each atom property dict for chain in model: cid = chain.get_id() for residue in chain: rid = residue.get_id() dkey = (cid, rid) if (dkey in dssp): ss = dssp_map[dssp[dkey][2]] else: ss = dssp_map['-'] for atom in residue: atom.xtra[ss] = 1.0 return list(set(dssp_map.values()))
def Make_dssp(): ref = { 'A': 'ALA', 'R': 'ARG', 'N': 'ASN', 'D': 'ASP', 'B': 'ASX', 'C': 'CYS', 'E': 'GLU', 'Q': 'GLN', 'Z': 'GLX', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE', 'L': 'LEU', 'K': 'LYS', 'M': 'MET', 'F': 'PHE', 'P': 'PRO', 'S': 'SER', 'T': 'THR', 'W': 'TRP', 'Y': 'TYR', 'V': 'VAL', 'X': '---' } dssp_dict = {} dssp_dict['X'] = np.NaN p = PDBParser() structure = p.get_structure('model', './current_pdb.txt') mod = structure[0] dssp = DSSP(mod, './current_pdb.txt') for i in range(len(dssp)): a_key = list(dssp.keys())[i] dssp_dict[str(a_key[0]) + str(a_key[1][1]) + str(ref[dssp[a_key][1]])] = dssp[a_key][2] return (dssp_dict)
ftp.retrbinary("RETR %s" % filename, callback=fp.write) print("processing: %s" % filename) p = PDBParser() with gzip.open(filename, 'rt') as f: structure = p.get_structure("", f) pdb_id = structure.header["idcode"] assert pdb_id, "no PDB ID for %s" % filename model = structure[0] try: dssp = DSSP(model, filename, dssp="/Users/luis/dssp-2.3.0/mkdssp") except Exception as e: print(e) print() os.remove(filename) continue valid_aa = [ 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y' ] current_chain = "" chain = "" phi_psis = [] dihedrals = [] chains = []