def parse(self, protein_id=None, filename=None, Protein_model=None): """Parses a PDB with the ProDy parser""" if filename == None: parser = prody.parseCIF(protein_id, model=Protein_model) else: parser = prody.parseCIF(filename, model=Protein_model) return parser
def get_structures(structure_chain_id_pairs: typing.List[typing.Tuple[str, str]]): """ Gets ProDy AtomGroup objects for each (pdb_id, chain) pair """ pdb_to_chain = {p: c for p, c in structure_chain_id_pairs} return [pd.parseCIF(x, chain=pdb_to_chain[x]) for x in pdb_to_chain.keys()]
def get_chain_from_trainid(proteinnet_id): """ Given a ProteinNet ID of a training or validation set item, this function returns the associated ProDy-parsed chain object. "1A9U_2_A" """ # Try parsing the ID as a PDB ID. If it fails, assume it's an ASTRAL ID. try: pdbid, model_id, chid = proteinnet_id.split("_") if "#" in pdbid: pdbid = pdbid.split("#")[1] except ValueError: try: pdbid, astral_id = proteinnet_id.split("_") return get_chain_from_astral_id(astral_id.replace("-", "_"), ASTRAL_ID_MAPPING) except KeyError: return ERRORS["MISSING_ASTRAL_IDS"] except ValueError: return ERRORS["FAILED_ASTRAL_IDS"] except: return ERRORS["FAILED_ASTRAL_IDS"] # Continue loading the chain, given the PDB ID try: chain = pr.parsePDB(pdbid, chain=chid) except: try: chain = pr.parseCIF( pdbid, chain=chid ) # changed pr.parsePDB to pr.parseCIF, removed heirarchal view except AttributeError: return ERRORS["PARSING_ERROR_ATTRIBUTE"] except pr.proteins.pdbfile.PDBParseError: return ERRORS["PARSING_ERROR"] except OSError: return ERRORS["PARSING_ERROR_OSERROR"] except Exception as e: return ERRORS["UNKNOWN_EXCEPTIONS"] if chain is None: print(proteinnet_id) return ERRORS["NONE_CHAINS"] # Attempt to select a coordset try: if chain.numCoordsets() > 1: chain.setACSIndex(int(model_id)) except IndexError: return ERRORS["COORDSET_INDEX_ERROR"] return chain
def get_annotations_single(uniprot_id, pdb_id, residue_mapper: dict, chain=None, n_modes=6, full_pdb_solvent_accessibility=True): structure = pd.parseCIF(pdb_id, chain=chain) gnm, calphas = pd.calcGNM(structure, n_modes=n_modes) anm, _ = pd.calcANM(structure, n_modes=n_modes) effectiveness, sensitivity = get_perturbations(anm, n_modes) hinge_sites = [get_hinge_indices(gnm, mode=n) for n in range(n_modes)] return StructureAnnotation( pdb_id, chain, structure, calphas, uniprot_id, residue_mapper, get_enm_fluctuations(anm, n_modes), effectiveness, sensitivity, get_stiffness(anm, calphas, n_modes), get_relative_solvent_accessibility( pdb_id, residue_mapper, chain=chain, full_pdb_solvent_accessibility=full_pdb_solvent_accessibility), hinge_sites, anm, gnm)
def get_chain_from_trainid(pnid): """Return a ProDy chain object for a ProteinNet ID. Assumes train/valid ID. Args: pnid: ProteinNet ID Returns: ProDy chain object corresponding to ProteinNet ID. """ modified_model_number = False # Try parsing the ID as a PDB ID. If it fails, assume it's an ASTRAL ID. try: pdbid, chnum, chid = pnid.split("_") chnum = int(chnum) # If this is a validation set pnid, separate the annotation from the ID if "#" in pdbid: pdbid = pdbid.split("#")[1] except ValueError: try: pdbid, astral_id = pnid.split("_") return get_chain_from_astral_id(astral_id.replace("-", "_"), ASTRAL_ID_MAPPING) except KeyError: return pnid, errors.ERRORS["MISSING_ASTRAL_IDS"] except (ValueError, Exception): return pnid, errors.ERRORS["FAILED_ASTRAL_IDS"] # Continue loading the chain, given the PDB ID use_pdb = True try: chain = pr.parsePDB(pdbid, chain=chid, model=chnum) if not chain: chain = pr.parseCIF(pdbid, chain=chid, model=chnum) use_pdb = False # If the file is too large, then we can download the CIF instead except OSError: try: chain = pr.parseCIF(pdbid, chain=chid, model=chnum) use_pdb = False except IndexError: try: chain = pr.parseCIF(pdbid, chain=chid, model=1) use_pdb = False modified_model_number = True except Exception as e: print(e) return pnid, errors.ERRORS["PARSING_ERROR_OSERROR"] except Exception as e: # EOFERROR print(e) return pnid, errors.ERRORS["PARSING_ERROR_OSERROR"] except AttributeError: return pnid, errors.ERRORS["PARSING_ERROR_ATTRIBUTE"] except (pr.proteins.pdbfile.PDBParseError, IndexError): # For now, if the requested coordinate set doesn't exist, then we will # default to using the only (first) available coordinate set struct = pr.parsePDB(pdbid, chain=chid) if use_pdb else pr.parseCIF(pdbid, chain=chid) if struct and chnum > 1: try: chain = pr.parsePDB(pdbid, chain=chid, model=1) modified_model_number = True except Exception: return pnid, errors.ERRORS["PARSING_ERROR"] else: return pnid, errors.ERRORS["PARSING_ERROR"] except Exception as e: print(e) return pnid, errors.ERRORS["UNKNOWN_EXCEPTIONS"] if chain is None: return pnid, errors.ERRORS["NONE_CHAINS"] if modified_model_number: return chain, "MODIFIED_MODEL" if contains_d_amino_acids(chain): return pnid, errors.ERRORS["D_AMINO_ACIDS"] return chain