Example #1
0
    def test_from_pdb_file(self, bgl3_pdb_filename, amino_acid_list):
        pdb_s = PDBStructure.from_pdb_file(str(bgl3_pdb_filename))
        with open(bgl3_pdb_filename) as f:
            pdb_f = PDBStructure.from_pdb_file(f)
        pdb_p = PDBStructure.from_pdb_file(bgl3_pdb_filename)

        pdb = PDBStructure(amino_acid_list)

        assert pdb.seq == pdb_s.seq
        assert pdb.seq == pdb_f.seq
        assert pdb.seq == pdb_p.seq
Example #2
0
    def from_json(cls, in_json: str):
        """Construct instance from JSON.

        Parameters:
            in_json: JSON-formatted string representing a ParentsSequences.

        Return:
            ParentsSequences instance created from in_json.

        """
        records, alignment, pdb = json.loads(in_json)

        seq_records = []
        for sr_list in records:
            sr_seq, sr_id, sr_name, sr_desc = sr_list
            seq = Seq(sr_seq)
            sr = SeqRecord(seq, id=sr_id, name=sr_name, description=sr_desc)
            seq_records.append(sr)

        if pdb is not None:
            pdb = PDBStructure.from_json(pdb)

        new_instance = cls(seq_records, pdb)

        if alignment is not None:
            # This will renumber the pdb_structure but that's okay.
            alignment = [tuple(ele) for ele in alignment]
            new_instance._alignment = alignment

        return new_instance
Example #3
0
    def test_json(self, amino_acid_list, bgl3_fasta_filename):
        in_pdb = PDBStructure(amino_acid_list)
        json_str = in_pdb.to_json()
        assert json_str
        out_pdb = PDBStructure.from_json(json_str)
        assert in_pdb.seq == out_pdb.seq
        with pytest.raises(AttributeError):
            out_pdb.unrenumbered_amino_acids
        with pytest.raises(AttributeError):
            out_pdb.renumbering_seq

        p1 = str(list(SeqIO.parse(bgl3_fasta_filename, 'fasta'))[0].seq)
        p1_short = p1[:400]
        renum_pdb = PDBStructure(amino_acid_list)
        renum_pdb.renumber(p1_short)
        json_str2 = renum_pdb.to_json()
        assert json_str2
        out_pdb2 = PDBStructure.from_json(json_str2)
        assert renum_pdb.seq == out_pdb2.seq
        assert renum_pdb.renumbering_seq == out_pdb2.renumbering_seq
Example #4
0
    def test_renumbering(self, bgl3_fasta_filename, amino_acid_list):
        # TODO: Probably want to do this one with multiple PDBs.
        assert amino_acid_list

        # Test PDBStructure init.
        pdb = PDBStructure(amino_acid_list)
        assert pdb.amino_acids
        with pytest.raises(AttributeError):
            pdb.unrenumbered_amino_acids
        with pytest.raises(AttributeError):
            pdb.renumbering_seq
        assert pdb.seq
        assert pdb.contacts

        renum_pdb = copy.deepcopy(pdb)
        p1 = str(list(SeqIO.parse(bgl3_fasta_filename, 'fasta'))[0].seq)
        renum_pdb.renumber(p1)
        assert len(pdb.amino_acids) == len(renum_pdb.amino_acids) \
            + len(renum_pdb.unrenumbered_amino_acids)
        # Technically this could be true, but not for this PDB.
        assert pdb.seq != renum_pdb.seq
        assert renum_pdb.renumbering_seq == p1
        assert pdb.contacts
        # Most important!
        assert all(aa == p1[i] for i, aa in enumerate(renum_pdb.seq)
                   if aa != '-')

        renum_pdb.derenumber()
        with pytest.raises(AttributeError):
            pdb.unrenumbered_amino_acids
        with pytest.raises(AttributeError):
            pdb.renumbering_seq
        assert pdb.seq == renum_pdb.seq
        assert len(pdb.contacts) == len(renum_pdb.contacts)
        pdb_contacts = set(pdb.contacts)
        for contact in renum_pdb.contacts:
            assert contact in pdb_contacts

        # Derenumbering when not renumbered.
        with pytest.raises(AttributeError):
            pdb.derenumber()
Example #5
0
    def invalid_init(self, bgl3_fasta_filename, amino_acid_list):
        p1 = str(list(SeqIO.parse(bgl3_fasta_filename, 'fasta'))[0].seq)
        with pytest.raises(ValueError):
            PDBStructure(amino_acid_list, None, p1)

        p1_short = p1[:400]
        renum_pdb = PDBStructure(amino_acid_list)
        renum_pdb.renumber(p1_short)
        amino_acids = renum_pdb.amino_acids
        unrenum = renum_pdb.unrenumbered_amino_acids
        with pytest.raises(ValueError):
            PDBStructure(amino_acids, unrenum, None)

        # Empty unrenumbered_amino_acids.
        with pytest.raises(ValueError):
            PDBStructure(amino_acids, [], p1_short)

        # p1 too short.
        with pytest.raises(ValueError):
            PDBStructure(amino_acids, unrenum, p1_short[:200])

        # p1 too long.
        with pytest.raises(ValueError):
            PDBStructure(amino_acids, unrenum, p1)
Example #6
0
    def get_PDB(self) -> None:
        """Construct from ParentAlignment using BLAST and PDB.

        The best structure is found by using BLAST to download candidate PDB
        sequences, then the sequence with the largest minimum identity to the
        parents is selected and set to the parent_alignment attribute.

        Parameters:
            parent_aln: Parent alignment used to query the PDB. Note that
                parent_aln[0] is used in the query and PDBStructure alignment.

        Raises:
            ValueError: If no matching PDB structure could be found.

        """
        query_str = str(self.records[0].seq)

        pdb_srs = list(query_blast(query_str, 'pdbaa', 100))

        # Find the PDB struct with largest minimum identity to the parents.
        pdb_min_map = {}
        for pdb_sr in pdb_srs:
            min_iden = min(
                calc_identity(parent, pdb_sr) for parent in self.records)
            pdb_min_map[pdb_sr.id] = min_iden

        try:
            best_id = max(pdb_min_map, key=lambda pdbid: pdb_min_map[pdbid])
        except ValueError:
            raise RuntimeError('No best PDB could be found.')

        # Get the pdb_structure from rcsb.
        _, acc, chain = best_id.split('|')
        url = 'https://files.rcsb.org/view/' + acc + '.pdb'
        with urlopen(url) as f:
            pdb_structure = PDBStructure.from_pdb_file(f, chain=chain)

        self.pdb_structure = pdb_structure
Example #7
0
    def test_double_renum(self, bgl3_fasta_filename, amino_acid_list):
        """Same as test_renumbering but double renum with two reduced p1."""
        pdb = PDBStructure(amino_acid_list)
        p1 = str(list(SeqIO.parse(bgl3_fasta_filename, 'fasta'))[0].seq)

        renum_pdb = copy.deepcopy(pdb)
        p1_short = p1[:400]
        renum_pdb.renumber(p1_short)
        assert renum_pdb.unrenumbered_amino_acids  # Should be occupied now.
        assert len(pdb.amino_acids) == len(renum_pdb.amino_acids) \
            + len(renum_pdb.unrenumbered_amino_acids)
        assert pdb.seq != renum_pdb.seq
        assert renum_pdb.renumbering_seq == p1_short
        assert all(aa == p1_short[i] for i, aa in enumerate(renum_pdb.seq)
                   if aa != '-')

        # Renumber again.
        p1_middle_removed = p1_short[:150] + p1_short[250:]
        renum_pdb.renumber(p1_middle_removed)
        assert renum_pdb.unrenumbered_amino_acids
        assert len(pdb.amino_acids) == len(renum_pdb.amino_acids) \
            + len(renum_pdb.unrenumbered_amino_acids)
        assert pdb.seq != renum_pdb.seq
        assert renum_pdb.renumbering_seq == p1_middle_removed
        assert all(aa == p1_middle_removed[i] for i, aa
                   in enumerate(renum_pdb.seq) if aa != '-')

        renum_pdb.derenumber()
        with pytest.raises(AttributeError):
            pdb.unrenumbered_amino_acids
        with pytest.raises(AttributeError):
            pdb.renumbering_seq
        assert pdb.seq == renum_pdb.seq
        assert len(pdb.contacts) == len(renum_pdb.contacts)
        # pdb_contacts = set(pdb.contacts)
        for contact in renum_pdb.contacts:
            assert contact in pdb.contacts
        return hash(repr(self))

    def __repr__(self):
        in_edges = [e.in_node.index for e in self.in_edges]
        out_edges = [e.out_node.index for e in self.out_edges]
        ret = f'Node({self.col}, {self.index}, {self.breakpoint}, ' \
            f'{in_edges=}, {out_edges=})'
        return ret


if __name__ == '__main__':
    import sys

    loc = '../../tests/bgl3_sample/'
    pa = ParentAlignment.from_fasta(loc + 'bgl3_sequences.fasta')
    pdb = PDBStructure.from_pdb_file(loc + '1GNX.pdb')
    pa.pdb_structure = pdb

    sys.exit()
    '''
    loc = '../tests/bgl3_sample/truncated/'
    pa = ParentAlignment.from_fasta(loc+'trunc.fasta')
    pdb = PDBStructure.from_pdb_file(loc+'trunc.pdb')
    '''
    loc = '../../tests/bgl3_sample/'
    pa = ParentAlignment.from_fasta(loc + 'bgl3_sequences.fasta')
    pdb = PDBStructure.from_pdb_file(loc + '1GNX.pdb')
    pa.pdb_structure = pdb

    vector_overhangs = [(0, 'TATG'), (3, 'TGAG')]
    n = 4
Example #9
0
def bgl3_PDBStructure(bgl3_pdb_filename):
    return PDBStructure.from_pdb_file(bgl3_pdb_filename)