def test_delete_clashing_sidechains(package, resource, cutoff, sequence): """Compare results to expected sequence.""" with resources.path(package, resource) as path: structure = read_molecules(str(path))[0] structure = delete_clashing_sidechains(structure, cutoff) structure = delete_partial_residues(structure) assert get_sequence(structure) == sequence
def test_apply_insertions(package_list, resource_list, sequence): """Compare results to expected sequence.""" with resources.path(package_list[0], resource_list[0]) as pdb_path: with resources.path(package_list[1], resource_list[1]) as loop_db_path: structure = read_molecules(str(pdb_path))[0] structure = remove_non_protein(structure, remove_water=True) structure_with_insertions = apply_insertions( structure, sequence, loop_db_path) sequence_with_insertions = get_sequence(structure_with_insertions) assert sequence_with_insertions == sequence
def test_apply_mutations(package, resource, sequence, delete_fallback, expectation, expected_sequence): """Compare results to expected sequence.""" with resources.path(package, resource) as pdb_path: structure = read_molecules(str(pdb_path))[0] structure = remove_non_protein(structure, remove_water=True) with expectation: structure_with_mutations = apply_mutations(structure, sequence, delete_fallback) sequence_with_mutations = get_sequence(structure_with_mutations) assert sequence_with_mutations == expected_sequence
def _get_kinase_residue_numbers( kinase_domain_structure: oechem.OEGraphMol, canonical_kinase_domain_sequence: KinaseDomainAminoAcidSequence, ) -> List[int]: """ Get the canonical residue numbers of a kinase domain structure. Parameters ---------- kinase_domain_structure: oechem.OEGraphMol The kinase domain structure. canonical_kinase_domain_sequence: KinaseDomainAminoAcidSequence The canonical kinase domain sequence. Returns ------- residue_number: list of int A list of canonical residue numbers in the same order as the residues in the given kinase domain structure. """ from Bio import pairwise2 from kinoml.modeling.OEModeling import get_sequence logging.debug("Getting sequence of given kinase domain ...") target_sequence = get_sequence(kinase_domain_structure) logging.debug("Aligning sequences ...") template_sequence, target_sequence = pairwise2.align.globalxs( canonical_kinase_domain_sequence, target_sequence, -10, 0)[0][:2] logging.debug(f"Template sequence:\n{template_sequence}") logging.debug(f"Target sequence:\n{target_sequence}") logging.debug("Generating residue numbers ...") residue_numbers = [] residue_number = canonical_kinase_domain_sequence.metadata["begin"] for template_sequence_residue, target_sequence_residue in zip( template_sequence, target_sequence): if template_sequence_residue != "-": if target_sequence_residue != "-": residue_numbers.append(residue_number) residue_number += 1 else: # TODO: This situation occurs if the given protein contains # sequence segments that are not part of the # canonical kinase domain sequence from UniProt. # I don't think this will ever happen in the # current implementation. raise NotImplementedError return residue_numbers
def test_delete_partial_residues(package, resource, delete_backbone_C, sequence): """Compare results to expected sequence.""" from openeye import oechem with resources.path(package, resource) as path: structure = read_molecules(str(path))[0] if delete_backbone_C: hier_view = oechem.OEHierView(structure) hier_residue = hier_view.GetResidue("A", delete_backbone_C[:3], int(delete_backbone_C[3:])) for atom in hier_residue.GetAtoms(): atom_name = atom.GetName().strip() if atom_name == "C": structure.DeleteAtom(atom) structure = delete_partial_residues(structure) assert get_sequence(structure) == sequence
def test_delete_short_protein_segments(package, resource, sequence): """Compare results to expected sequence.""" with resources.path(package, resource) as path: structure = read_molecules(str(path))[0] structure = delete_short_protein_segments(structure) assert get_sequence(structure) == sequence
def test_get_sequence(package, resource, sequence): """Compare results to expected sequence.""" with resources.path(package, resource) as path: structure = read_molecules(str(path))[0] assert get_sequence(structure) == sequence