def test_substitute(self): """Exchange must produce G,C, and U.""" exchange_base(self.adenosine, 'G') self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), 'G') exchange_base(self.adenosine, 'C') self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), 'C') exchange_base(self.adenosine, 'U') self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), 'U')
def test_mutate(self): """Should allow all kinds of residue exchanges.""" br = BaseRecognizer() resi = RNAResidue(self.chain.child_list[2]) abbrevs = [ 'A', 'Am', 'C', 'dC', 'dT', 'C', 'A', 'ac6A', 'm5U', 'Y', 'dA', 'Am', 'A' ] for name in abbrevs: modify_residue(resi, name) self.assertEqual(br.identify_resi(resi), name)
def test_mods_sanity(self): """Adding and removing many times should work as well.""" #for mod in ['m1A','m66A','Am','t6A']: for mod in ['m1A', 'm6Am', 'Am', 't6A']: # there is no modification named m66A. There is m6Am self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), 'A') add_modification(self.adenosine, mod) self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), mod) remove_modification(self.adenosine)
def test_dna_exchange(self): """All combinations of DNA->DNA exchanges should work.""" bases = ['dT', 'dA', 'dG', 'dC'] br = BaseRecognizer() r = self.adenosine for b1 in bases: add_modification(r, b1) self.assertEqual(br.identify_resi(r), b1) for b2 in bases: remove_modification(r) add_modification(r, b2) self.assertEqual(br.identify_resi(r), b2)
def test_resubstitute(self): """Substituting A by U and U by A should give A again.""" exchange_base(self.adenosine, 'G') exchange_base(self.adenosine, 'A') self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), 'A') # exchange_base(self.adenosine, 'C') exchange_base(self.adenosine, 'A') self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), 'A') # exchange_base(self.adenosine, 'U') exchange_base(self.adenosine, 'A') self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), 'A')
def test_mutate_unknown(self): """Should allow all kinds of residue exchanges.""" br = BaseRecognizer() resi = RNAResidue(self.chain.child_list[2]) modify_residue(resi, 'X') self.assertRaises(BaseRecognitionError, br.identify_resi, resi) modify_residue(self.unk['39'], 'A') self.assertEqual(self.unk['39'].short_abbrev, 'A') modify_residue(self.unk['40'], 'Y') self.assertEqual(self.unk['40'].short_abbrev, 'P')
def test_add_to_wrong_base(self): """Add modification to A that belongs to G should not work.""" add_modification(self.adenosine, 'm1G') self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), 'm1G') atoms_m1G = [ "C1'", 'C2', "C2'", "C3'", 'C4', "C4'", 'C5', "C5'", 'C6', 'C8', 'CM1', 'N1', 'N2', 'N3', 'N7', 'N9', "O2'", "O3'", "O4'", "O5'", 'O6', 'OP1', 'OP2', 'P' ] atoms = [at.name.strip() for at in self.adenosine.child_list] atoms.sort() self.assertEqual(atoms_m1G, atoms)
def setUp(self): self.br = BaseRecognizer()
class BaseRecognizerTests(TestCase): """ Tests for the modification recognizer. Takes two sample PDB files and checks whether all modifications are found. """ def setUp(self): self.br = BaseRecognizer() def tearDown(self): self.br = None def test_mini_template(self): """Should identify 15 bases including one modification.""" names = [ 'G', 'C', 'G', 'G', 'A', 'U', 'U', 'U', 'A', 'm2G', 'C', 'U', 'C', 'A', 'G' ] struc = PDBParser().get_structure('test', MINI_TEMPLATE) chain = struc[0]['A'] for resi, correct in zip(chain, names): base = self.br.identify_resi(resi) self.assertEqual(base, correct) def test_border_cases(self): """Recognize difficult residues by M.Skorupski.""" path = TEST_DATA_PATH + 'nucleotides/border_cases/unknown%i_%s.pdb' EXAMPLES = [(1, 'C'), (2, 'C'), (3, 'C'), (4, 'U'), (5, 'C'), (6, 'C')] for num, base in EXAMPLES: fname = path % (num, base) struc = PDBParser().get_structure('test', fname) resi = struc[0].child_list[0].child_list[0] result = self.br.identify_resi(resi) self.assertEqual(result, base) def test_1ehz(self): """In the tRNA structure 14 modifications should be found.""" # check the modifications in 1ehz ehz_modifications = dict([(10, 'm2G'), (16, 'D'), (17, 'D'), (26, 'm22G'), (32, 'Cm'), (34, 'Gm'), (37, 'yW'), (39, 'Y'), (40, 'm5C'), (46, 'm7G'), (49, 'm5C'), (54, 'm5U'), (55, 'Y'), (58, "m1A")]) # merged with former speed test struc = PDBParser().get_structure('test', RNA_1EHZ) chain = struc[0]['A'] checked = 0 for resi in chain.child_list: base = self.br.identify_resi(resi) if ehz_modifications.has_key(resi.id[1]): self.assertEqual(base, ehz_modifications[resi.id[1]]) checked += 1 else: self.assertNotEqual(base, '') self.assertEqual(checked, 14) def test_ms2i6A(self): """Test ms2i6A because this one is difficult.""" struc = PDBParser().get_structure('test_struc', RNA_2OW8) chain = struc[0]['z'] resi = chain.child_list[36] base = self.br.identify_resi(resi) self.assertEqual(base, 'ms2i6A') def test_1qf6(self): """Test another difficult tRNA.""" # check the modifications in 1qf6 qf_modifications = dict([(16, 'D'), (17, 'D'), (20, 'D'), (37, 'm6t6A'), (46, 'm7G'), (54, 'm5U'), (55, 'Y')]) struc = PDBParser().get_structure('test_struc', RNA_1QF6) chain = struc[0]['B'] checked = 0 for resi in chain.child_list: if qf_modifications.has_key(resi.id[1]): base = self.br.identify_resi(resi) self.assertEqual(base, qf_modifications[resi.id[1]]) checked += 1 self.assertEqual(checked, len(qf_modifications.keys())) def test_trna12H_37(self): """Difficult t6A example should work.""" struc = PDBParser().get_structure( 'test_struc', PATH_TO_LIR_STRUCTURES + 'trna12H.pdb')[0][' '] resi = struc.child_dict[('H_T6A', 37, ' ')] self.assertEqual(self.br.identify_resi(resi), 't6A') def test_pr0059Hc_32(self): """Difficult m2OH example should work.""" struc = PDBParser().get_structure( 'test_struc', PATH_TO_LIR_STRUCTURES + 'pr0059Hc.pdb')[0]['C'] resi = struc.child_dict[('H_OMC', 32, ' ')] self.assertEqual(self.br.identify_resi(resi), 'Cm') def test_2ap0_9_a_8(self): """Difficult C example should work.""" struc = PDBParser().get_structure( 'test_struc', TEST_DATA_PATH + 'nucleotides/2ap0_9_A.pdb')[0]['A'] resi = struc.child_dict[(' ', 8, ' ')] self.assertEqual(self.br.identify_resi(resi), 'C') def test_bromouridine(self): """Modifications with strange hetatoms should be caught.""" struc = PDBParser().get_structure( 'test_struc', TEST_DATA_PATH + 'nucleotides/5BrU.pdb')[0]['A'] resi = struc.child_list[0] self.assertEqual(self.br.identify_resi(resi), '?U') def test_3jyv_7(self): """Difficult m2G example should work.""" struc = PDBParser().get_structure( 'test_struc', TEST_DATA_PATH + 'rna_structures/3jyv_7.pdb')[0]['7'] resi = struc.child_dict[(' ', 10, ' ')] self.assertEqual(self.br.identify_resi(resi), 'm2G') resi = struc.child_dict[(' ', 26, ' ')] self.assertEqual(self.br.identify_resi(resi), 'm22G') resi = struc.child_dict[(' ', 32, ' ')] self.assertEqual(self.br.identify_resi(resi), 'Cm') resi = struc.child_dict[(' ', 39, ' ')] self.assertEqual(self.br.identify_resi(resi), 'Y') def test_atp_adp_amp(self): struc = PDBParser().get_structure( 'test_struc', TEST_DATA_PATH + 'nucleotides/ATP_ADP_AMP.pdb')[0]['B'] resis = struc.child_list self.assertEqual(self.br.identify_resi(resis[0]), 'ADP') self.assertEqual(self.br.identify_resi(resis[1]), 'GTP') self.assertEqual(self.br.identify_resi(resis[2]), 'AMP') self.assertEqual(self.br.identify_resi(resis[3]), 'GTP') self.assertEqual(self.br.identify_resi(resis[4]), 'dTMP') def test_amo(self): """recognize A with aminoacylated phosphate.""" struc = PDBParser().get_structure( 'test_struc', TEST_DATA_PATH + 'nucleotides/1C0A_AMO.pdb')[0]['A'] resis = struc.child_list self.assertEqual(self.br.identify_resi(resis[0]), '?A_mod_phos') def test_so4_group(self): """Strange anorganic group should not work.""" struc = PDBParser().get_structure('test_struc', SULFATE)[0]['A'] resi = struc.child_dict[('H_SO4', 1481, ' ')] self.assertRaises(BaseRecognitionError, self.br.identify_resi, resi) def test_misc_ligands(self): """Large set of exemplars is recognized.""" expected = [ '3pADP', 'GMP', 'm66A', 'ms2i6Aiso', 'A', 'd8fG', 'preQ1tRNA', '23pC', 'D', 'arabinoseU', 'd8oG', '2ofluoro-m5U', '3meo5mC', 'Arp', 'dmh5U', 'QtRNA', 'm5Ueth', 'dG', 'm1A', '2oNAcC', 'm4Cm', '3meoG', 'phosphonoG', 'm6t6A', '2ofluoro-8oG', 'd5propnU', 'm1A', 'd5mpC', 'm1G', 'm7G', 'mnm5U', 'o6U', 'de3T', 'm22G', 'm7G', "?Tm_5'amino", 'disoG', '?U_mod_phos', 'm5U', 'mcm5s2U', '2oNC', 'dA', 'arabinoseA', '7fluorobenzylG', 'm5C', 'VA', 'd3pT', 'mnm5s2U', 'dhOroP', 'm5Um', 'd5mpA', '5fluoroC', 's2U', 'dm5C', '3meoA', 'Am', '?C_mod_phos', 'd5mpA', 'd3pT', '5fluoroU', 'dG', 'd5propU', 'N2A', 'alpha-dA', 'dC', '?G_mod_phos', '2ofluoro-m7G', 'd5nitroU', 'dN2A', 'Cm', 'dethC', '2mcarbT', '23pA', 'Gm', 'D', 'yW', '5nitroC', 'Um', 's4U', None, '?U_mod_diphos', 'm5Um', 'arabinoseC', 'm2A', 'm2G', 't6A', 'd35pG', 'A', 'dm8G', 'm5U', 'arabinoseG', None, '35pG', '?A_mod_phos', '?U', 'dm6G', 'tfA', '?U_mod_diphos', '?A_mod_phos', 'dmo5U', None, 'dN72G', '3meo5mU', 'tfT', 'Y', '2ofluoro-8oG', "?Tm_5'amino", 'phosphonoG', 'd5propU' ] correct = 0 chain = PDBParser().get_structure( 'test_struc', TEST_DATA_PATH + 'nucleotides/misc_nucleotides.pdb')[0]['A'] for resi, exp in zip(chain.child_list, expected): #if exp != "3meo5mC": continue try: result = self.br.identify_resi(resi) except BaseRecognitionError, e: #print 'error: '+str(e) result = None if result == exp: correct += 1 else: print resi, result, exp self.assertEqual(correct, len(chain.child_list))
def setUp(self): self.a = read_alignment(MINI_ALIGNMENT) self.t = Template(MINI_TEMPLATE, seq=Sequence("GCGGAUUUALCUCAG")) self.m = RnaModel(self.t, self.a) self.seq_before = self.t.get_sequence() self.br = BaseRecognizer()
class RNAResidue(Residue): """ Supplements Bio.PDB.Residue object with functions to manage RNA-specific features. """ br = BaseRecognizer() def __init__(self, pdb_residue, alphabet_entry=None, new_atoms=True): """ Arguments: - residue as a Bio.PDB.Residue instance - optional: AlphabetEntry instance, if it is not given, the residue is identified using BaseRecognizer (slow) """ Residue.__init__(self, pdb_residue.id, pdb_residue.resname, ' ') self.number = pdb_residue.id[1] self.disordered = pdb_residue.disordered self.has_double_coord = self.__check_double_coord(pdb_residue) self.modified = None self.long_abbrev = None if alphabet_entry: abbrev = alphabet_entry.long_abbrev else: try: abbrev = self.br.identify_resi(pdb_residue) except BaseRecognitionError: abbrev = alphabet.get_short_original(ANY_RESIDUE) self.change_name(abbrev) self.identifier = str(self.id[1]).strip() + self.id[2].strip() self.__create_atoms(pdb_residue, new_atoms) # caches for H-bond calculation --> faster self._donors = None self._acceptors = None self._donor_hydrogens = {} def __check_double_coord(self, resi): """ Checks whether any atoms in residue have alternative coordinates given in the pdb file. """ if not self.disordered: return False for atom in resi: if atom.is_disordered(): if len(atom.disordered_get_list()) > 1: return True return False def __create_atoms(self, pdb_residue, new_atoms): if new_atoms: # copy all atoms, in case the original is manipulated. for atom in pdb_residue.child_list: if not atom.name[0] in '*H123': # .startswith('H'): element = re.sub('[\s\d]', '', atom.name)[0] or 'C' new_at = Atom(atom.name, atom.coord, atom.bfactor, atom.occupancy, atom.altloc, atom.fullname, atom.serial_number, element=element) self.add(new_at) else: # use the old atoms (saves time) [self.add(atom) for atom in pdb_residue.child_list] def __len__(self): """Returns number of atoms.""" return len(self.child_list) def __repr__(self): """Returns string representation""" return '<Residue %s %s>' % (self.identifier, self.long_abbrev) def __getitem__(self, name): """ Returns an atom like PDB.Residue, but interprets N* as the glycosidic N. """ # KR: added to take care of the N1/N9 locally (important for LIR) # C,U ---> N1; A,G ---> N9; pseudouridine ---> C5; if name == 'N*': if self.long_abbrev in ['Y', 'm1acp3Y', 'Ym', 'm3Y']: return Residue.__getitem__(self, 'C5') if self.pyrimidine: return Residue.__getitem__(self, 'N1') elif self.purine: return Residue.__getitem__(self, 'N9') elif self.original_base == 'X': if 'N9' in self.child_dict: return Residue.__getitem__(self, 'N9') elif 'N1' in self.child_dict: return Residue.__getitem__(self, 'N1') else: raise RNAResidueError( 'Cannot decide which atom to use for glycosidic N in residue %s' % self) elif 'N1' in self.child_dict: return Residue.__getitem__(self, 'N1') elif 'N9' in self.child_dict: return Residue.__getitem__(self, 'N9') else: raise RNAResidueError( 'Cannot decide which atom to use for glycosidic N in residue %s' % self) else: return Residue.__getitem__(self, name) def change_number(self, new_number): """Changes a residues number to the given string.""" try: num = int(new_number.strip()) self.id = (self.id[0], num, ' ') except: try: letter = new_number.strip()[-1] num = int(new_number.strip()[:-1]) self.id = (self.id[0], num, letter) except ValueError: raise RNAResidueError('Invalid residue number: %s' % new_number) self.number = num self.identifier = new_number.strip() def change_name(self, new_name): """ Changes the residues name. to a new name (as a long abbreviation if modified) """ if new_name not in alphabet: new_name = alphabet.get_short_original(ANY_RESIDUE).long_abbrev aentry = alphabet[new_name] self.resname = aentry.pdb_abbrev self.long_abbrev = aentry.long_abbrev if new_name in STANDARD_BASES: # standard base self.modified = False self.id = (' ', self.id[1], self.id[2]) elif aentry.original_base.upper() in "X": # unknown residues --> water, ions. self.modified = False else: # modified base self.modified = True self.id = ('H_' + aentry.pdb_abbrev, self.id[1], self.id[2]) if aentry.pdb_abbrev == 'UNK': abbrev = '0' * (3 - len(aentry.new_abbrev)) + aentry.new_abbrev self.resname = abbrev self.id = ('H_' + abbrev, self.id[1], self.id[2]) self._clear_caches() def _clear_caches(self): """Delete internally saved shortcuts""" self._donors = None self._acceptors = None self._donor_hydrogens = {} @property def alphabet_entry(self): """Returns an alphabet entry for this residue.""" return alphabet[self.long_abbrev] @property def purine(self): """Returns True if the residue is a purine.""" if self.original_base in ("G", "A"): return True @property def pyrimidine(self): """Returns True if the residue is a pyrimidine.""" if self.original_base in ("C", "U"): return True @property def original_base(self): """Returns the unmodified base abbreviation.""" return self.alphabet_entry.original_base @property def short_abbrev(self): """Returns a one-letter abbreviation of the residue.""" return self.alphabet_entry.short_abbrev @property def new_abbrev(self): """Returns the Modomics nomenclature abbreviation.""" return self.alphabet_entry.new_abbrev @property def pdb_abbrev(self): """Returns a three-letter PDB abbreviation.""" return self.alphabet_entry.pdb_abbrev @property def full_name(self): """Returns the full name of the nucleotide.""" return self.alphabet_entry.full_name @property def category(self): """Returns the cathegory of the nucleotide.""" return self.alphabet_entry.category # ------------------- helper functions to work with atoms --------------- def get_atom_vector(self, name): """returns a vector for the given atom. N* encodes the glyco-N""" try: return self[name].get_vector() except KeyError: raise RNAResidueError('There is no atom %s in residue %s' % (name, self.identifier)) def get_atoms_by_names(self, names, strict=False): """Generates atoms from the given list of names.""" for name in names: try: yield self[name] except KeyError: if strict: raise KeyError("Atom %s not found" % name) def check_atoms(self, names): """Returns True if all atom names in the given list exist.""" try: [atom for atom in self.get_atoms_by_names(names, strict=True)] return True except KeyError: return False def get_bp(self, resi2): """returns an interaction type between two residues or None if there is no interaction""" return (base_pair_calc(self, resi2)) # ------------ helper methods for h-bond calculation ---------------------- def get_hbond_donors(self): """Generates atoms that are H-bond donors of this residue.""" if self._donors: return self._donors key = self.original_base.strip() result = list(self.get_atoms_by_names(DONORS.get(key, []))) self._donors = result return result def get_hbond_acceptors(self): """Generates atoms that are H-bond acceptors of this residue.""" if self._acceptors: return self._acceptors key = self.original_base.strip() result = list(self.get_atoms_by_names(ACCEPTORS.get(key, []))) self._acceptors = result return result def get_neighbors(self, atom): """Returns a list of atoms in the same residue connected by bonds.""" result = [] if 'N9' in self.child_dict: nb_dict = PURINE_NEIGHBOR_TABLE else: nb_dict = PYRIMIDINE_NEIGHBOR_TABLE for name in nb_dict.get(atom.fullname): child = self.child_dict.get(name) if child: result.append(child) return result def get_donor_hydrogens(self, donor): """ Returns a list of coord records of hypothetical hydrogen positions. If the donor has two neighbors, this will be a single position, if it has only one, a rotation will be performed in 10 degree steps. Atoms with 3 or more neighbors will be rejected. """ # TODO: refactor this out. if donor.name in self._donor_hydrogens: return self._donor_hydrogens[donor.name] hydrogens = [] neighbors = self.get_neighbors(donor) don_vec = donor.get_vector() sup_vec1 = None # coordinates next to donor sup_vec2 = None # coordinates to calc torsion if len(neighbors) == 1: sup_vec1 = neighbors[0].get_vector() neighbors2 = self.get_neighbors(neighbors[0]) sup_vec2 = None while neighbors2 and sup_vec2 is None: next = neighbors2.pop() if next != donor: sup_vec2 = next.get_vector() # bad case: no neighbors to construct 2nd support vec if sup_vec2 is None: sup_vec2 = (don_vec**sup_vec1) angle = H_ANGLE_ONE torsions = H_GENERATE_TORSIONS elif len(neighbors) == 2: sup_vec1 = neighbors[0].get_vector() sup_vec2 = neighbors[1].get_vector() angle = H_ANGLE_TWO torsions = [180.0] if sup_vec1 is not None and sup_vec2 is not None: # create hydrogen positions for torsion in torsions: h_pos = build_coord(sup_vec2, sup_vec1, don_vec, H_COVALENT_BOND, angle, torsion) h_pos = array([h_pos[0], h_pos[1], h_pos[2]]) hydrogens.append(h_pos) self._donor_hydrogens[donor.name] = hydrogens # self.write_hydrogens(hydrogens) return hydrogens
def test_add_to_a(self): """Add modification to A.""" add_modification(self.adenosine, 'm1A') self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), 'm1A')
def test_init(self): """The Adenosine test file must be really an A.""" self.assertEqual(self.adenosine.long_abbrev, 'A') self.assertEqual(self.adenosine.short_abbrev, 'A') recon = BaseRecognizer().identify_resi(self.adenosine) self.assertEqual(recon, 'A')
def test_remove_deoxy(self): struc = ModernaStructure('file', DNA_WITH_MISMATCH, 'E') r10 = struc['10'] self.assertEqual(BaseRecognizer().identify_resi(r10), 'dG') remove_modification(r10) self.assertEqual(BaseRecognizer().identify_resi(r10), 'G')
def test_remove(self): struc = ModernaStructure('file', MINI_TEMPLATE) r10 = struc['10'] self.assertEqual(BaseRecognizer().identify_resi(r10), 'm2G') remove_modification(r10) self.assertEqual(BaseRecognizer().identify_resi(r10), 'G')
def test_all(self): """Adding should work for all modifications.""" a = Alphabet() br = BaseRecognizer() not_working = [] errors = [] EXCLUDED = [ 'A', 'G', 'C', 'U', '?A', '?G', '?C', '?U', # exclude unknown 'X', '?X', 'Xm', 'x', 'preQ0base', 'Qbase', 'preQ1base', 'galQtRNA', # indistinguishable from ManQtRNA '-', '_', 'yW-58', 'yW-72', 'yW-86', 'm8A', 'fa7d7G', # new in Modomics 2009, not yet in ModeRNA. 'm7Gpp_cap', # not implemented yet ] SYNONYMS = { 'm42C': 'm44C', 'm42Cm': 'm44Cm', 'm62A': 'm66A', 'm62Am': 'm66Am' } for k in a: if k not in EXCLUDED and a[k].category not in [ 'unknown', 'standard', 'ligand', 'synthetic', 'stereoisomer', 'insertion', 'missing', ' ' ]: struc = ModernaStructure('file', A_RESIDUE) r = struc['1'] try: add_modification(r, k) right = SYNONYMS.get(k, k) if br.identify_resi(r) != right: not_working.append(k + ',' + br.identify_resi(r)) # write file for checking struc.write_pdb_file('dummies/' + k + '.pdb') except ModernaResidueError: raise errors.append(k) if not_working or errors: print '\nTest failed for modifications.' print 'Different base was recognized:' print ', '.join(not_working) print 'ERROR occured:' print ', '.join(errors) self.assertEqual(len(not_working) + len(errors), 0)
def test_add_to_u(self): """Add modification to U.""" exchange_base(self.adenosine, 'U') add_modification(self.adenosine, 'Y') self.assertEqual(BaseRecognizer().identify_resi(self.adenosine), 'Y')