def getLigandResIds(ligchemid: str, struct: Structure) -> List[Residue]: """Returns a list of dictionaries specifying each _ligand_ of type @ligchemid as a biopython-residue inside a given @struct.""" """*ligchemids are of type https://www.rcsb.org/ligand/IDS""" ligandResidues: List[Residue] = list( filter(lambda x: x.get_resname() == ligchemid, list(struct.get_residues()))) return ligandResidues
def featurize(structure: Structure) -> list[Any]: """ Calculates 3D ML features from the `structure`. """ structure1 = freesasa.Structure(pdbpath) result = freesasa.calc(structure1) area_classes = freesasa.classifyResults(result, structure1) Total_area = [] Total_area.append(result.totalArea()) Polar_Apolar = [] for key in area_classes: # print( key, ": %.2f A2" % area_classes[key]) Polar_Apolar.append(area_classes[key]) # get all the residues residues = [res for res in structure.get_residues()] seq_length = [] seq_length.append(len(residues)) # calculate some random 3D features (you should be smarter here!) protein_length = residues[1]["CA"] - residues[-2]["CA"] angle = calc_dihedral( residues[1]["CA"].get_vector(), residues[2]["CA"].get_vector(), residues[-3]["CA"].get_vector(), residues[-2]["CA"].get_vector(), ) # create the feature vector features = [Total_area, Polar_Apolar, protein_length, seq_length, angle] return features
def get_ptc_residues(struct: Structure, pdbid: str, conserved_nucleotides: List[int]) -> List[Residue]: ECOLI_PTC_CONSERVED_NUCLEOTIDES = [ '2055', '2451', '2452', '2504', '2505', '2506', '2507' ] def belongs_to_ptc(x: Residue): return int(x.get_id()[1]) in conserved_nucleotides PTC_residues = filter(belongs_to_ptc, [*struct.get_residues()]) return [*PTC_residues]
def featurize(structure: Structure) -> list[Any]: """ Calculates 3D ML features from the `structure`. """ # get all the residues residues = [res for res in structure.get_residues()] # calculate some random 3D features (you should be smarter here!) protein_length = residues[1]["CA"] - residues[-2]["CA"] angle = calc_dihedral( residues[1]["CA"].get_vector(), residues[2]["CA"].get_vector(), residues[-3]["CA"].get_vector(), residues[-2]["CA"].get_vector(), ) # create the feature vector features = [protein_length, angle] return features
def getLigandResIds(ligchemid:str, struct: Structure, dict_or_res:bool='dict')->List[ResidueFullIdDict]: """Returns a list of dictionaries specifying each ligand as a residue inside a given struct.""" ligandResidues: List[Residue] = list(filter(lambda x: x.get_resname() == ligchemid, list( struct.get_residues() ))) if dict_or_res == 'dict': return [ ResidueFullIdDict(res) for res in ligandResidues ] else: return ligandResidues
def getLigandsResIds(ligChemIds:List[str], struct: Structure)->List[ResidueFullIdDict]: """Returns a list of dictionaries specifying each ligand as a residue inside a given struct.""" ligandResidues: List[Residue] = list(filter(lambda x: x.get_resname() in ligChemIds, list( struct.get_residues() ))) return [ ResidueFullIdDict(res) for res in ligandResidues ]
class PyRyStructure(object): """ class represents structure as entity (very wide definition) used for storing information about structures, creating BIO.pdb structures, saving structure files etc. """ def __init__(self, structure=None): if structure: self.struct = structure else: self.struct = None self.sequence = '' # sequence taken from structure #----------------will decide on one of these 3 ---------------------------- self.center_of_mass = [] # [x,y,z] coords of center of mass self.geometric_center = [] # geometric centre self.center = None # actual center of given complex component #-------------------------------------------------------------------------- self.chain = '' # chain name from structure file self.moltype = '' # protein, DNA, RNA def __str__(self): return "%s %s %s %s %s"%(self.struct, self.chain, self.center_of_mass,\ self.moltype, self.sequence) def add_chain_to_struct(self, chain_id): """ adds another model to BIO.pdb structure object Parameters: ----------- chain_id : chain name Returns: --------- self.struct : Bio.PDB structure with new chain """ chain = Chain(chain_id) self.struct[0].add(chain) def add_residues_to_structure(self, struct, chain_id, chain2_id): """ adds residues from struct to a given structure (self.structure) Parameters: ----------- struct : template structure object with residues which will be added to self.structure object chain_id : name of template chain chain2_id : name of new chain in self.struct Returns: --------- self.stuct : with extra residues """ residues = struct[0][chain_id].child_list [self.struct[0][chain2_id].add(res) for res in residues] def calculate_atom_atom_distance(self, atom1, atom2): """ calculates distance between two atoms Parameters: ----------- atom1, atom2 : Bio.PDB.Atom entities Returns: --------- distance from atom1 to atom2 in 3D space Raises: ------- PyRyStructureError if parameters are not Bio.PDB.Atom entities """ if is_structure(): return atom1 - atom2 def calculate_centre_of_mass(self, entity=None, geometric=False): """ calculates centre of mass for given structure Returns gravitic or geometric center of mass of an Entity. Geometric assumes all masses are equal (geometric=True) Defaults to Gravitic. Parameters: ----------- geometric : optional Returns: --------- centre of mass coordinates as [x,y,z] list Raises: ------- ValueError : if wrong object is given as a target PyRyStructureError : no PyRyStructure object """ #if self.struct == None: raise PyRyStructureError("You haven't provided \ # any structure to PyRyStructure class") if isinstance(self.struct, Entity.Entity): # Structure, Model, Chain, Residue atom_list = self.struct.get_atoms() elif hasattr(entity, '__iter__') and filter(lambda x: x.level ==\ 'A', entity): # List of Atoms atom_list = entity else: # Some other weirdo object raise ValueError('Center of Mass can only be calculated from \n\ the following objects:Structure, Model, Chain, Residue, list of Atoms.' ) new_centre = [0., 0., 0.] whole_mass = 0 for atom in atom_list: atom_centre = array([ float(atom.coord[0]), float(atom.coord[1]), float(atom.coord[2]) ]) whole_mass += atom.molweight new_centre += atom_centre * atom.molweight new_centre /= whole_mass self.center_of_mass = new_centre return self.center_of_mass def create_PDB_obj(self, id, filename): """ creates Bio.PDB object from pdb file Parameters: ----------- id : name of structure filename : file name """ parser = PDBParser() self.struct = parser.get_structure(str(id), filename) def create_new_structure(self, name, chain_id): """ creates new Bio.PDB structure object Parameters: ----------- name : structure name chain_id : chain name (e.g. A, B, C) Returns: --------- self.struct : Bio.PDB object with model and chain inside """ self.struct = Structure(name) my_model = Model(0) my_chain = Chain(chain_id) self.struct.add(my_model) self.struct[0].add(my_chain) def get_chainname(self): """ returns name of given structures chain """ self.chain = list(self.struct.get_chains())[0].id def get_mol_sequence(self): """ retrieves struct sequence as one letter code Parameters: ----------- self.struct : structure object Returns: --------- self.sequence : sequence of given structure in one letter code """ ##----must be included in tests!!!-------------------- for resi in self.struct.get_residues(): resi_name = resi.resname.strip().upper() #add one letter nucleotide names if len(resi_name) == 1 and resi_name in RESNAMES.values(): self.sequence += resi_name #add hetatms with modifications elif resi_name in to_one_letter_code: self.sequence += to_one_letter_code[resi_name] #do not add ions and ligands into sequence elif resi_name in LIGANDS: pass #if antyhing else appeared include as X else: self.sequence += "X" return self.sequence def get_moltype(self): """ based on component's sequence determines if a certain component is DNA, RNA or protein Raises: ------- PyRyStructureError if resnames are incorrect """ res = list(self.struct.get_residues())[0] if len(res.resname.strip()) == 3: if res.resname.strip() in AMINOACIDS.values(): self.moltype = 'protein' else: if res.resname.strip() in RESNAMES.keys(): pass else: raise PyRyStructureError("Wrong 3letter name", res.resname.strip()) else: for at in res: if at.fullname.strip() == "CA": self.moltype = 'protein' break elif at.fullname.strip() == "C4'" or at.fullname.strip( ) == "C4*": for atom in res.child_list: if atom.fullname.strip() == "O2'": self.moltype = "RNA" break if self.moltype == "": self.moltype = "DNA" return self.moltype def is_structure(self): """ checks if a given structure is Bio.PDB structure object Raises: ------ PyRyStructureError : if self.struct is not Bio.PDB object """ if isinstance(self.struct, Entity.Entity): # Structure, Model, Chain, Residue return True else: raise PyRyStructureError('%s should be one of\n\ the following objects:Structure, Model, Chain, Residue, \n\ list of Atoms.' % (self.struct)) def set_chain_name(self, chain): self.chain = chain def set_moltype(self, moltype): """ """ self.moltype = moltype def set_structure(self, struct): self.struct = struct def set_pyrystructure(self, structure=None): """ sets structure as PyRyStructure atrribute Parameters: ----------- structure : Bio.PDB structure object """ if self.struct == None: self.struct = structure if self.sequence == '': self.get_mol_sequence() self.get_chainname() self.get_moltype() def set_sequence(self, seq): """ """ self.sequence = seq def write_structure(self, filename): """ Writting structure to the pdb_file, saving changed coordinated Parameters: ----------- filename : final name of structure file """ out = PDBIO() out.set_structure(self.struct) out.save(filename)