def test_from_pdb_file(self, bgl3_pdb_filename, amino_acid_list): pdb_s = PDBStructure.from_pdb_file(str(bgl3_pdb_filename)) with open(bgl3_pdb_filename) as f: pdb_f = PDBStructure.from_pdb_file(f) pdb_p = PDBStructure.from_pdb_file(bgl3_pdb_filename) pdb = PDBStructure(amino_acid_list) assert pdb.seq == pdb_s.seq assert pdb.seq == pdb_f.seq assert pdb.seq == pdb_p.seq
def from_json(cls, in_json: str): """Construct instance from JSON. Parameters: in_json: JSON-formatted string representing a ParentsSequences. Return: ParentsSequences instance created from in_json. """ records, alignment, pdb = json.loads(in_json) seq_records = [] for sr_list in records: sr_seq, sr_id, sr_name, sr_desc = sr_list seq = Seq(sr_seq) sr = SeqRecord(seq, id=sr_id, name=sr_name, description=sr_desc) seq_records.append(sr) if pdb is not None: pdb = PDBStructure.from_json(pdb) new_instance = cls(seq_records, pdb) if alignment is not None: # This will renumber the pdb_structure but that's okay. alignment = [tuple(ele) for ele in alignment] new_instance._alignment = alignment return new_instance
def test_json(self, amino_acid_list, bgl3_fasta_filename): in_pdb = PDBStructure(amino_acid_list) json_str = in_pdb.to_json() assert json_str out_pdb = PDBStructure.from_json(json_str) assert in_pdb.seq == out_pdb.seq with pytest.raises(AttributeError): out_pdb.unrenumbered_amino_acids with pytest.raises(AttributeError): out_pdb.renumbering_seq p1 = str(list(SeqIO.parse(bgl3_fasta_filename, 'fasta'))[0].seq) p1_short = p1[:400] renum_pdb = PDBStructure(amino_acid_list) renum_pdb.renumber(p1_short) json_str2 = renum_pdb.to_json() assert json_str2 out_pdb2 = PDBStructure.from_json(json_str2) assert renum_pdb.seq == out_pdb2.seq assert renum_pdb.renumbering_seq == out_pdb2.renumbering_seq
def test_renumbering(self, bgl3_fasta_filename, amino_acid_list): # TODO: Probably want to do this one with multiple PDBs. assert amino_acid_list # Test PDBStructure init. pdb = PDBStructure(amino_acid_list) assert pdb.amino_acids with pytest.raises(AttributeError): pdb.unrenumbered_amino_acids with pytest.raises(AttributeError): pdb.renumbering_seq assert pdb.seq assert pdb.contacts renum_pdb = copy.deepcopy(pdb) p1 = str(list(SeqIO.parse(bgl3_fasta_filename, 'fasta'))[0].seq) renum_pdb.renumber(p1) assert len(pdb.amino_acids) == len(renum_pdb.amino_acids) \ + len(renum_pdb.unrenumbered_amino_acids) # Technically this could be true, but not for this PDB. assert pdb.seq != renum_pdb.seq assert renum_pdb.renumbering_seq == p1 assert pdb.contacts # Most important! assert all(aa == p1[i] for i, aa in enumerate(renum_pdb.seq) if aa != '-') renum_pdb.derenumber() with pytest.raises(AttributeError): pdb.unrenumbered_amino_acids with pytest.raises(AttributeError): pdb.renumbering_seq assert pdb.seq == renum_pdb.seq assert len(pdb.contacts) == len(renum_pdb.contacts) pdb_contacts = set(pdb.contacts) for contact in renum_pdb.contacts: assert contact in pdb_contacts # Derenumbering when not renumbered. with pytest.raises(AttributeError): pdb.derenumber()
def invalid_init(self, bgl3_fasta_filename, amino_acid_list): p1 = str(list(SeqIO.parse(bgl3_fasta_filename, 'fasta'))[0].seq) with pytest.raises(ValueError): PDBStructure(amino_acid_list, None, p1) p1_short = p1[:400] renum_pdb = PDBStructure(amino_acid_list) renum_pdb.renumber(p1_short) amino_acids = renum_pdb.amino_acids unrenum = renum_pdb.unrenumbered_amino_acids with pytest.raises(ValueError): PDBStructure(amino_acids, unrenum, None) # Empty unrenumbered_amino_acids. with pytest.raises(ValueError): PDBStructure(amino_acids, [], p1_short) # p1 too short. with pytest.raises(ValueError): PDBStructure(amino_acids, unrenum, p1_short[:200]) # p1 too long. with pytest.raises(ValueError): PDBStructure(amino_acids, unrenum, p1)
def get_PDB(self) -> None: """Construct from ParentAlignment using BLAST and PDB. The best structure is found by using BLAST to download candidate PDB sequences, then the sequence with the largest minimum identity to the parents is selected and set to the parent_alignment attribute. Parameters: parent_aln: Parent alignment used to query the PDB. Note that parent_aln[0] is used in the query and PDBStructure alignment. Raises: ValueError: If no matching PDB structure could be found. """ query_str = str(self.records[0].seq) pdb_srs = list(query_blast(query_str, 'pdbaa', 100)) # Find the PDB struct with largest minimum identity to the parents. pdb_min_map = {} for pdb_sr in pdb_srs: min_iden = min( calc_identity(parent, pdb_sr) for parent in self.records) pdb_min_map[pdb_sr.id] = min_iden try: best_id = max(pdb_min_map, key=lambda pdbid: pdb_min_map[pdbid]) except ValueError: raise RuntimeError('No best PDB could be found.') # Get the pdb_structure from rcsb. _, acc, chain = best_id.split('|') url = 'https://files.rcsb.org/view/' + acc + '.pdb' with urlopen(url) as f: pdb_structure = PDBStructure.from_pdb_file(f, chain=chain) self.pdb_structure = pdb_structure
def test_double_renum(self, bgl3_fasta_filename, amino_acid_list): """Same as test_renumbering but double renum with two reduced p1.""" pdb = PDBStructure(amino_acid_list) p1 = str(list(SeqIO.parse(bgl3_fasta_filename, 'fasta'))[0].seq) renum_pdb = copy.deepcopy(pdb) p1_short = p1[:400] renum_pdb.renumber(p1_short) assert renum_pdb.unrenumbered_amino_acids # Should be occupied now. assert len(pdb.amino_acids) == len(renum_pdb.amino_acids) \ + len(renum_pdb.unrenumbered_amino_acids) assert pdb.seq != renum_pdb.seq assert renum_pdb.renumbering_seq == p1_short assert all(aa == p1_short[i] for i, aa in enumerate(renum_pdb.seq) if aa != '-') # Renumber again. p1_middle_removed = p1_short[:150] + p1_short[250:] renum_pdb.renumber(p1_middle_removed) assert renum_pdb.unrenumbered_amino_acids assert len(pdb.amino_acids) == len(renum_pdb.amino_acids) \ + len(renum_pdb.unrenumbered_amino_acids) assert pdb.seq != renum_pdb.seq assert renum_pdb.renumbering_seq == p1_middle_removed assert all(aa == p1_middle_removed[i] for i, aa in enumerate(renum_pdb.seq) if aa != '-') renum_pdb.derenumber() with pytest.raises(AttributeError): pdb.unrenumbered_amino_acids with pytest.raises(AttributeError): pdb.renumbering_seq assert pdb.seq == renum_pdb.seq assert len(pdb.contacts) == len(renum_pdb.contacts) # pdb_contacts = set(pdb.contacts) for contact in renum_pdb.contacts: assert contact in pdb.contacts
return hash(repr(self)) def __repr__(self): in_edges = [e.in_node.index for e in self.in_edges] out_edges = [e.out_node.index for e in self.out_edges] ret = f'Node({self.col}, {self.index}, {self.breakpoint}, ' \ f'{in_edges=}, {out_edges=})' return ret if __name__ == '__main__': import sys loc = '../../tests/bgl3_sample/' pa = ParentAlignment.from_fasta(loc + 'bgl3_sequences.fasta') pdb = PDBStructure.from_pdb_file(loc + '1GNX.pdb') pa.pdb_structure = pdb sys.exit() ''' loc = '../tests/bgl3_sample/truncated/' pa = ParentAlignment.from_fasta(loc+'trunc.fasta') pdb = PDBStructure.from_pdb_file(loc+'trunc.pdb') ''' loc = '../../tests/bgl3_sample/' pa = ParentAlignment.from_fasta(loc + 'bgl3_sequences.fasta') pdb = PDBStructure.from_pdb_file(loc + '1GNX.pdb') pa.pdb_structure = pdb vector_overhangs = [(0, 'TATG'), (3, 'TGAG')] n = 4
def bgl3_PDBStructure(bgl3_pdb_filename): return PDBStructure.from_pdb_file(bgl3_pdb_filename)