def fetch_protein(pdb_id: str) -> Tuple[List[str], np.ndarray]: # retrieve pdb file from Protein Data Bank pdb_file = f"{pdb_id}.pdb" pdb_file_path = os.path.join(os.getcwd(), pdb_file) protein_url = f"https://files.rcsb.org/download/{pdb_file}" req = requests.get(protein_url) with open(pdb_file_path, "w") as f: f.write(req.text) # parse pdb file structure = PDBParser().get_structure(pdb_id, pdb_file) peptides = PPBuilder().build_peptides(structure)[0] # extract amino acid sequence and phi/psi angles aa_sequence = list(peptides.get_sequence()) phi_psi_angles = np.array( list( map( lambda x: (180 if not x[0] else np.rad2deg(x[0]), 180 if not x[1] else np.rad2deg(x[1])), peptides.get_phi_psi_list()))).T # remove pdb file subprocess.check_output(["rm", pdb_file]) return aa_sequence, phi_psi_angles
def find_pdb_limits(pdb_path): """""" pdb = PDBParser().get_structure('', pdb_path) # takes the first (and only) polypeptide pp = PPBuilder().build_peptides(pdb)[0] start = pp[0].get_id()[1] end = pp[-1].get_id()[1] seq = pp.get_sequence() return (start, end, seq)