def test_add_domain_attribute(): test_data_dir = shephard.get_data('test_data') fasta_file = '%s/%s' % (test_data_dir, 'testset_1.fasta') domain_file = '%s/%s' % (test_data_dir, 'TS1_domains_idr.tsv') P = uniprot.uniprot_fasta_to_proteome(fasta_file) interfaces.si_domains.add_domains_from_file(P, domain_file) prot = P.protein('O00401') domain = prot.domains[0] domain.add_attribute('test_attribute', 1) assert domain.attribute('test_attribute') == 1 # this should fail with pytest.raises(DomainException): domain.add_attribute('test_attribute', 20) # because the operation above should have failed, this too should # have failed assert domain.attribute('test_attribute') == 1 domain.add_attribute('test_attribute', 20, safe=False) assert domain.attribute('test_attribute') == 20 assert len(domain.attributes) == 1 domain.add_attribute('another_test_attribute', 'testval') assert len(domain.attributes) == 2 with pytest.raises(DomainException): assert domain.attribute('does not exist') == 20 # check this returns none assert domain.attribute('does not exist', safe=False) is None
def test_add_domains_file(): test_data_dir = shephard.get_data('test_data') fasta_file = '%s/%s' % (test_data_dir, 'testset_1.fasta') domain_file = '%s/%s' % (test_data_dir, 'TS1_domains_idr.tsv') P = uniprot.uniprot_fasta_to_proteome(fasta_file) interfaces.si_domains.add_domains_from_file(P, domain_file) # this should fail because already added with pytest.raises(ProteinException): interfaces.si_domains.add_domains_from_file(P, domain_file) P = uniprot.uniprot_fasta_to_proteome(fasta_file) interfaces.si_domains.add_domains_from_file(P, domain_file, autoname=True) print('') P = uniprot.uniprot_fasta_to_proteome(fasta_file) interfaces.si_domains.add_domains_from_file(P, domain_file, autoname=False) # autoname allows 2 apparetly identical domain files to be added interfaces.si_domains.add_domains_from_file(P, domain_file, autoname=True) # autoname allows 2 apparetly identical domain files to be added P = uniprot.uniprot_fasta_to_proteome(fasta_file) interfaces.si_domains.add_domains_from_file(P, domain_file, autoname=False, skip_bad=True)
def test_write_domain_with_attributes(): # this setup was also tested in test_add_domain_attribute test_data_dir = shephard.get_data('test_data') fasta_file = '%s/%s' % (test_data_dir, 'testset_1.fasta') domain_file = '%s/%s' % (test_data_dir, 'TS1_domains_idr.tsv') P = uniprot.uniprot_fasta_to_proteome(fasta_file) interfaces.si_domains.add_domains_from_file(P, domain_file) prot = P.protein('O00401') domain = prot.domains[0] domain.add_attribute('test_attribute_1', 1) domain.add_attribute('test_attribute_cat', 'cat')
def test_fasta_to_proteome_part_1(): test_data_dir = shephard.get_data('test_data') print(test_data_dir) P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta')) assert len(P.protein('1')) == 390 assert len(P) == 9 test_UID = 0 for i in P.proteins: assert i == str(test_UID) test_UID = test_UID + 1 P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'), proteome=P) assert len(P) == 18 test_UID = 0 for i in P.proteins: assert i == str(test_UID) test_UID = test_UID + 1 ## ## This block checks that removing a protein from the integer-indexed added proteins ## really removes it and that adding new proteins in does correctly start counting ## in the right place P.remove_protein(10) P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'), proteome=P) test_UID = 0 for i in P.proteins: if test_UID == 10: with pytest.raises(ProteomeException): assert P.protein(test_UID) test_UID = test_UID + 1 assert i == str(test_UID) test_UID = test_UID + 1
""" import shephard from shephard.interfaces import si_sites, si_domains, si_tracks, si_protein_attributes from shephard.apis import uniprot import pytest import sys TS1_FILE = [ 'testset_1.fasta', 'TS1_domains_idr.tsv', 'TS1_domains_pscore.tsv', 'TS1_sites.tsv', 'TS1_tracks_pscore.tsv', 'TS1_protein_attributes.tsv', 'testset_1_ptms.tsv' ] test_data_dir = shephard.get_data('test_data') def build_proteome(fn): return uniprot.uniprot_fasta_to_proteome('%s/%s' % (test_data_dir, fn)) @pytest.fixture def TS1(request): TS1_proteome = build_proteome(TS1_FILE[0]) return TS1_proteome @pytest.fixture def TS1_domains(request): TS1_proteome = build_proteome(TS1_FILE[0])
def test_fasta_to_proteome_part_2(): def header_parser(s): return s.split('|')[1] test_data_dir = shephard.get_data('test_data') print(test_data_dir) # check we've read in the proteome OK P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'), build_unique_ID=header_parser) assert len(P) == 9 # this should trigger an exception because we're adding in duplicates with pytest.raises(ProteomeException): P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'), build_unique_ID=header_parser, proteome=P) # this should not... P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'), proteome=P) assert len(P) == 18 # this also should not but should NOT add new sequences in P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'), build_unique_ID=header_parser, proteome=P, force_overwrite=True) assert len(P) == 18 P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'), proteome=P) assert len(P) == 27 P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'), proteome=P, use_header_as_unique_ID=True) assert len(P) == 36 # this SHOULD trigger an exception because we shouldn't with pytest.raises(APIException): P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'), proteome=P, use_header_as_unique_ID=True, build_unique_ID=header_parser) expected_protein_uids = [ 'O00401', 'O00470', 'O00472', 'O00499', 'O00629', 'O00712', 'O00716', 'O14786', 'Q9UJX3', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', 'sp|O00401|WASL_HUMAN Neural Wiskott-Aldrich syndrome protein OS=H**o sapiens OX=9606 GN=WASL PE=1 SV=2', 'sp|O00470|MEIS1_HUMAN Homeobox protein Meis1 OS=H**o sapiens OX=9606 GN=MEIS1 PE=1 SV=1', 'sp|O00472|ELL2_HUMAN RNA polymerase II elongation factor ELL2 OS=H**o sapiens OX=9606 GN=ELL2 PE=1 SV=2', 'sp|O00499|BIN1_HUMAN Myc box-dependent-interacting protein 1 OS=H**o sapiens OX=9606 GN=BIN1 PE=1 SV=1', 'sp|O00629|IMA3_HUMAN Importin subunit alpha-3 OS=H**o sapiens OX=9606 GN=KPNA4 PE=1 SV=1', 'sp|O00712|NFIB_HUMAN Nuclear factor 1 B-type OS=H**o sapiens OX=9606 GN=NFIB PE=1 SV=2', 'sp|O00716|E2F3_HUMAN Transcription factor E2F3 OS=H**o sapiens OX=9606 GN=E2F3 PE=1 SV=1', 'sp|O14786|NRP1_HUMAN Neuropilin-1 OS=H**o sapiens OX=9606 GN=NRP1 PE=1 SV=3', 'sp|Q9UJX3|APC7_HUMAN Anaphase-promoting complex subunit 7 OS=H**o sapiens OX=9606 GN=ANAPC7 PE=1 SV=4' ] # CHECK all the unique IDs expected can be read in for i in expected_protein_uids: P.protein(i)
def test_add_protein(): # creating proteome and adding protein test_data_dir = shephard.get_data('test_data') P = uniprot.uniprot_fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta')) assert len(P.protein('O00401')) == 505 assert len(P.protein('O00470')) == 390 assert len(P.protein('O00472')) == 640 assert len(P.protein('O00499')) == 593 assert len(P.protein('O00629')) == 521 assert len(P.protein('O00712')) == 420 assert len(P.protein('O00716')) == 465 assert len(P.protein('O14786')) == 923 assert len(P.protein('Q9UJX3')) == 599 # creating a proteome from a FASTA file (using defaul unique key) P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta')) assert len(P.protein('1')) == 390 assert len(P.protein('2')) == 640 assert len(P.protein('3')) == 593 assert len(P.protein('4')) == 521 assert len(P.protein('5')) == 420 assert len(P.protein('6')) == 465 assert len(P.protein('7')) == 923 assert len(P.protein('8')) == 599 # create a proteome where FASTA header is used as uniqueID P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'), use_header_as_unique_ID=True) assert len( P.protein( 'sp|O00470|MEIS1_HUMAN Homeobox protein Meis1 OS=H**o sapiens OX=9606 GN=MEIS1 PE=1 SV=1' )) == 390 assert len( P.protein( 'sp|O00472|ELL2_HUMAN RNA polymerase II elongation factor ELL2 OS=H**o sapiens OX=9606 GN=ELL2 PE=1 SV=2' )) == 640 assert len( P.protein( 'sp|O00499|BIN1_HUMAN Myc box-dependent-interacting protein 1 OS=H**o sapiens OX=9606 GN=BIN1 PE=1 SV=1' )) == 593 assert len( P.protein( 'sp|O00629|IMA3_HUMAN Importin subunit alpha-3 OS=H**o sapiens OX=9606 GN=KPNA4 PE=1 SV=1' )) == 521 assert len( P.protein( 'sp|O00712|NFIB_HUMAN Nuclear factor 1 B-type OS=H**o sapiens OX=9606 GN=NFIB PE=1 SV=2' )) == 420 assert len( P.protein( 'sp|O00716|E2F3_HUMAN Transcription factor E2F3 OS=H**o sapiens OX=9606 GN=E2F3 PE=1 SV=1' )) == 465 assert len( P.protein( 'sp|O14786|NRP1_HUMAN Neuropilin-1 OS=H**o sapiens OX=9606 GN=NRP1 PE=1 SV=3' )) == 923 assert len( P.protein( 'sp|Q9UJX3|APC7_HUMAN Anaphase-promoting complex subunit 7 OS=H**o sapiens OX=9606 GN=ANAPC7 PE=1 SV=4' )) == 599 # check manually adding proteomes local_seq = 'PPPPP' P.add_protein(local_seq, '5pp', 'U5P') assert P.protein('U5P').sequence == local_seq assert P.protein('U5P').name == '5pp' # should trigger exception with pytest.raises(ProteomeException): P.add_protein(local_seq, '5pp', 'U5P') P.add_protein('ASDF', '5pp', 'U5P', force_overwrite=True) assert P.protein('U5P').sequence == 'ASDF' protein_list = [] p1 = { 'sequence': 'ASDFGH', 'name': "Test protein 1", 'unique_ID': 1.23, "attributes": None } protein_list.append(p1) # check this works P = proteome.Proteome(protein_list) print(P.proteins) assert P.protein(1.23).sequence == 'ASDFGH' assert P.protein("1.23").sequence == 'ASDFGH' P.remove_protein(1.23) with pytest.raises(ProteomeException): assert P.protein(1.23).sequence == 'ASDFGH'