Exemple #1
0
def test_add_domains_file():

    test_data_dir = shephard.get_data('test_data')
    fasta_file = '%s/%s' % (test_data_dir, 'testset_1.fasta')
    domain_file = '%s/%s' % (test_data_dir, 'TS1_domains_idr.tsv')

    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P, domain_file)

    # this should fail because already added
    with pytest.raises(ProteinException):
        interfaces.si_domains.add_domains_from_file(P, domain_file)

    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P, domain_file, autoname=True)

    print('')
    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P, domain_file, autoname=False)

    # autoname allows 2 apparetly identical domain files to be added
    interfaces.si_domains.add_domains_from_file(P, domain_file, autoname=True)

    # autoname allows 2 apparetly identical domain files to be added
    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P,
                                                domain_file,
                                                autoname=False,
                                                skip_bad=True)
Exemple #2
0
def test_si_site_add_from_file_test_robustness():

    TS1 = uniprot.uniprot_fasta_to_proteome('%s/%s' %
                                            (test_data_dir, 'testset_1.fasta'))

    # this should fail - skip_bad influences skipping
    with pytest.raises(ProteinException) as e_info:
        si_sites.add_sites_from_file(
            TS1,
            '%s/%s' % (test_data_dir, 'ts1_bonus_sites_bad.tsv'),
            skip_bad=True)

    with pytest.raises(ProteinException) as e_info:
        si_sites.add_sites_from_file(
            TS1,
            '%s/%s' % (test_data_dir, 'ts1_bonus_sites_bad.tsv'),
            skip_bad=False)

    # but if we load in with safe=False will skip over the bad site
    si_sites.add_sites_from_file(TS1,
                                 '%s/%s' %
                                 (test_data_dir, 'ts1_bonus_sites_bad.tsv'),
                                 safe=False)
    O00470 = TS1.protein('O00470')

    assert len(O00470) == 390
Exemple #3
0
def test_add_domain_attribute():

    test_data_dir = shephard.get_data('test_data')
    fasta_file = '%s/%s' % (test_data_dir, 'testset_1.fasta')
    domain_file = '%s/%s' % (test_data_dir, 'TS1_domains_idr.tsv')

    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P, domain_file)
    prot = P.protein('O00401')
    domain = prot.domains[0]
    domain.add_attribute('test_attribute', 1)

    assert domain.attribute('test_attribute') == 1

    # this should fail
    with pytest.raises(DomainException):
        domain.add_attribute('test_attribute', 20)

    # because the operation above should have failed, this too should
    # have failed
    assert domain.attribute('test_attribute') == 1

    domain.add_attribute('test_attribute', 20, safe=False)
    assert domain.attribute('test_attribute') == 20

    assert len(domain.attributes) == 1
    domain.add_attribute('another_test_attribute', 'testval')
    assert len(domain.attributes) == 2

    with pytest.raises(DomainException):
        assert domain.attribute('does not exist') == 20

    # check this returns none
    assert domain.attribute('does not exist', safe=False) is None
Exemple #4
0
def test_write_domain_with_attributes():

    # this setup was also tested in test_add_domain_attribute
    test_data_dir = shephard.get_data('test_data')
    fasta_file = '%s/%s' % (test_data_dir, 'testset_1.fasta')
    domain_file = '%s/%s' % (test_data_dir, 'TS1_domains_idr.tsv')

    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P, domain_file)
    prot = P.protein('O00401')
    domain = prot.domains[0]
    domain.add_attribute('test_attribute_1', 1)
    domain.add_attribute('test_attribute_cat', 'cat')
def test_getter_properies():
    TS1 = uniprot.uniprot_fasta_to_proteome('%s/%s' %
                                            (test_data_dir, 'testset_1.fasta'))

    prot = TS1.protein('O00470')

    assert len(TS1.protein('O00470')) == 390

    assert prot.name == "sp|O00470|MEIS1_HUMAN Homeobox protein Meis1 OS=H**o sapiens OX=9606 GN=MEIS1 PE=1 SV=1"
    assert str(
        prot.proteome) == "[Proteome]: Sequence dataset with 9 protein records"
    assert prot.unique_ID == 'O00470'
    assert prot.get_residue(1) == 'M'
    assert prot.get_residue(2) == 'A'
    assert prot.get_residue(390) == 'M'

    with pytest.raises(ProteinException):
        assert prot.get_residue(0) == 'M'

    #assert protein.get_sequence = '
    assert prot.sequence == 'MAQRYDDLPHYGGMDGVGIPSTMYGDPHAARSMQPVHHLNHGPPLHSHQYPHTAHTNAMAPSMGSSVNDALKRDKDAIYGHPLFPLLALIFEKCELATCTPREPGVAGGDVCSSESFNEDIAVFAKQIRAEKPLFSSNPELDNLMIQAIQVLRFHLLELEKVHELCDNFCHRYISCLKGKMPIDLVIDDREGGSKSDSEDITRSANLTDQPSWNRDHDDTASTRSGGTPGPSSGGHTSHSGDNSSEQGDGLDNSVASPSTGDDDDPDKDKKRHKKRGIFPKVATNIMRAWLFQHLTHPYPSEEQKKQLAQDTGLTILQVNNWFINARRRIVQPMIDQSNRAVSQGTPYNPDGQPMGGFVMDGQQHMGIRAPGPMSGMGMNMGMEGQWHYM'

    assert prot.get_sequence_region(1, 5) == 'MAQRY'
    assert prot.get_sequence_region(390, 390) == 'M'

    with pytest.raises(ProteinException):
        assert prot.get_sequence_region(390, 391) == 'M'

    assert prot.get_sequence_region(380, 390) == 'NMGMEGQWHYM'

    assert prot.get_sequence_context(1, 10) == 'MAQRYDDLPHY'
    assert prot.get_sequence_context(10, 3) == 'DLPHYGG'
    assert prot.get_sequence_context(390, 5) == 'GQWHYM'
    assert prot.get_sequence_context(
        50, 450
    ) == 'MAQRYDDLPHYGGMDGVGIPSTMYGDPHAARSMQPVHHLNHGPPLHSHQYPHTAHTNAMAPSMGSSVNDALKRDKDAIYGHPLFPLLALIFEKCELATCTPREPGVAGGDVCSSESFNEDIAVFAKQIRAEKPLFSSNPELDNLMIQAIQVLRFHLLELEKVHELCDNFCHRYISCLKGKMPIDLVIDDREGGSKSDSEDITRSANLTDQPSWNRDHDDTASTRSGGTPGPSSGGHTSHSGDNSSEQGDGLDNSVASPSTGDDDDPDKDKKRHKKRGIFPKVATNIMRAWLFQHLTHPYPSEEQKKQLAQDTGLTILQVNNWFINARRRIVQPMIDQSNRAVSQGTPYNPDGQPMGGFVMDGQQHMGIRAPGPMSGMGMNMGMEGQWHYM'

    with pytest.raises(ProteinException):
        prot.get_sequence_context(400, 3)

    assert prot.check_sequence_is_valid() is True

    assert len(prot.attributes) == 0

    assert prot.attribute('TEST', safe=False) is None

    assert len(prot.tracks) == 0
    assert len(prot.track_names) == 0
    assert prot.track('TEST', safe=False) is None
Exemple #6
0
def test_si_site_add_file_read_robustness():

    TS1 = uniprot.uniprot_fasta_to_proteome('%s/%s' %
                                            (test_data_dir, 'testset_1.fasta'))

    # thsi should fail - malformatted line
    with pytest.raises(InterfaceException) as e_info:
        si_sites.add_sites_from_file(
            TS1,
            '%s/%s' % (test_data_dir, 'ts1_bonus_sites_bad2.tsv'),
            skip_bad=False)

    si_sites.add_sites_from_file(TS1,
                                 '%s/%s' %
                                 (test_data_dir, 'ts1_bonus_sites_bad2.tsv'),
                                 skip_bad=True)
    assert len(TS1.sites) == 4
Exemple #7
0
def test_si_site_add_from_file():

    TS1 = uniprot.uniprot_fasta_to_proteome('%s/%s' %
                                            (test_data_dir, 'testset_1.fasta'))

    si_sites.add_sites_from_file(
        TS1, '%s/%s' % (test_data_dir, 'ts1_bonus_sites.tsv'))

    O00470 = TS1.protein('O00470')

    # check length
    assert len(O00470) == 390
    assert len(O00470.site(1)) == 1
    assert len(O00470.site(390)) == 1

    # get site a position 2 that doesn't exist
    with pytest.raises(ProteinException) as e_info:
        O00470.site(2)

    # get site at position 2 that doesn't exsit
    assert O00470.site(2, safe=False) == None
Exemple #8
0
def build_proteome(fn):
    return uniprot.uniprot_fasta_to_proteome('%s/%s' % (test_data_dir, fn))
Exemple #9
0
def test_add_protein():

    # creating proteome and adding protein
    test_data_dir = shephard.get_data('test_data')
    P = uniprot.uniprot_fasta_to_proteome('%s/%s' %
                                          (test_data_dir, 'testset_1.fasta'))
    assert len(P.protein('O00401')) == 505
    assert len(P.protein('O00470')) == 390
    assert len(P.protein('O00472')) == 640
    assert len(P.protein('O00499')) == 593
    assert len(P.protein('O00629')) == 521
    assert len(P.protein('O00712')) == 420
    assert len(P.protein('O00716')) == 465
    assert len(P.protein('O14786')) == 923
    assert len(P.protein('Q9UJX3')) == 599

    # creating a proteome from a FASTA file (using defaul unique key)
    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'))
    assert len(P.protein('1')) == 390
    assert len(P.protein('2')) == 640
    assert len(P.protein('3')) == 593
    assert len(P.protein('4')) == 521
    assert len(P.protein('5')) == 420
    assert len(P.protein('6')) == 465
    assert len(P.protein('7')) == 923
    assert len(P.protein('8')) == 599

    # create a proteome where FASTA header is used as uniqueID
    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'),
                                use_header_as_unique_ID=True)
    assert len(
        P.protein(
            'sp|O00470|MEIS1_HUMAN Homeobox protein Meis1 OS=H**o sapiens OX=9606 GN=MEIS1 PE=1 SV=1'
        )) == 390
    assert len(
        P.protein(
            'sp|O00472|ELL2_HUMAN RNA polymerase II elongation factor ELL2 OS=H**o sapiens OX=9606 GN=ELL2 PE=1 SV=2'
        )) == 640
    assert len(
        P.protein(
            'sp|O00499|BIN1_HUMAN Myc box-dependent-interacting protein 1 OS=H**o sapiens OX=9606 GN=BIN1 PE=1 SV=1'
        )) == 593
    assert len(
        P.protein(
            'sp|O00629|IMA3_HUMAN Importin subunit alpha-3 OS=H**o sapiens OX=9606 GN=KPNA4 PE=1 SV=1'
        )) == 521
    assert len(
        P.protein(
            'sp|O00712|NFIB_HUMAN Nuclear factor 1 B-type OS=H**o sapiens OX=9606 GN=NFIB PE=1 SV=2'
        )) == 420
    assert len(
        P.protein(
            'sp|O00716|E2F3_HUMAN Transcription factor E2F3 OS=H**o sapiens OX=9606 GN=E2F3 PE=1 SV=1'
        )) == 465
    assert len(
        P.protein(
            'sp|O14786|NRP1_HUMAN Neuropilin-1 OS=H**o sapiens OX=9606 GN=NRP1 PE=1 SV=3'
        )) == 923
    assert len(
        P.protein(
            'sp|Q9UJX3|APC7_HUMAN Anaphase-promoting complex subunit 7 OS=H**o sapiens OX=9606 GN=ANAPC7 PE=1 SV=4'
        )) == 599

    # check manually adding proteomes
    local_seq = 'PPPPP'
    P.add_protein(local_seq, '5pp', 'U5P')
    assert P.protein('U5P').sequence == local_seq
    assert P.protein('U5P').name == '5pp'

    # should trigger exception
    with pytest.raises(ProteomeException):
        P.add_protein(local_seq, '5pp', 'U5P')
    P.add_protein('ASDF', '5pp', 'U5P', force_overwrite=True)
    assert P.protein('U5P').sequence == 'ASDF'

    protein_list = []
    p1 = {
        'sequence': 'ASDFGH',
        'name': "Test protein 1",
        'unique_ID': 1.23,
        "attributes": None
    }
    protein_list.append(p1)

    # check this works
    P = proteome.Proteome(protein_list)
    print(P.proteins)
    assert P.protein(1.23).sequence == 'ASDFGH'
    assert P.protein("1.23").sequence == 'ASDFGH'

    P.remove_protein(1.23)
    with pytest.raises(ProteomeException):
        assert P.protein(1.23).sequence == 'ASDFGH'
def test_add_site():

    # build new proteome
    TS1 = uniprot.uniprot_fasta_to_proteome('%s/%s' %
                                            (test_data_dir, 'testset_1.fasta'))