Beispiel #1
0
def mock_proteins_and_genes(count):
    from database import db
    from models import Gene, Protein
    for i in range(count):
        g = Gene(name='Gene_%s' % i, full_name='Full name of gene %s' % i)
        p = Protein(refseq='NM_000%s' % i, gene=g)
        g.preferred_isoform = p
        db.session.add(g)
    def test_autocomplete_all(self):

        # MC3 GeneList is required as a target (a href for links) where users will be pointed
        # after clicking of cancer autocomplete suggestion. Likewise with the ClinVar list.
        db.session.add_all([
            GeneList(name=name, mutation_source_name=detail_class.name)
            for name, detail_class in [
                ('TCGA', MC3Mutation), ('ClinVar', InheritedMutation)
            ]
        ])

        g = Gene(name='BR')
        p = Protein(id=1, refseq='NM_007', gene=g, sequence='XXXXXV')
        g.preferred_isoform = p     # required for gene search to work - genes without preferred isoforms are ignored
        mut = Mutation(protein=p, position=6, alt='E')
        db.session.add_all([mut, p, g])

        def autocomplete(query):
            r = self.client.get('/search/autocomplete_all/?q=' + query)
            self.visit_returned_urls(r)
            return r

        from database import bdb_refseq, bdb
        bdb_refseq['BR V6E'] = [p.id]  # required for mutation search
        bdb.add_genomic_mut('1', 10000, 'T', 'C', mut)

        # Gene and mutations

        response = autocomplete('BR V6E')
        entry = get_entry_and_check_type(response, 'aminoacid mutation')
        assert entry

        response = autocomplete('BR V6')
        entry = get_entry_and_check_type(response, 'message')
        assert 'Awaiting for <code>{alt}</code>' in entry['name']

        response = autocomplete('BR V')
        entry = get_entry_and_check_type(response, 'message')
        assert 'Awaiting for <code>{pos}{alt}</code>' in entry['name']

        response = autocomplete('B')
        entry = get_entry_and_check_type(response, 'gene')
        assert 'BR' == entry['name']

        # genomic mutation
        response = autocomplete('chr1 10000 T C')
        entry = get_entry_and_check_type(response, 'nucleotide mutation')
        assert entry and entry['input'] == 'CHR1 10000 T C'

        # is the search falling back to the other strand?
        response = autocomplete('chr1 10000 A G')
        entry = get_entry_and_check_type(response, 'nucleotide mutation')
        assert entry and entry['input'] == 'complement of CHR1 10000 A G'

        prompt = 'Awaiting for mutation in <code>{chrom} {pos} {ref} {alt}</code> format'

        for prompt_invoking_query in ['chr1', 'chr1 ', 'chr1 40', 'chr1 40 ', 'chr1 40 T']:
            response = autocomplete(prompt_invoking_query)
            entry = get_entry_and_check_type(response, 'message')
            assert entry['name'] == prompt

        # Pathways

        pathways = [
            Pathway(description='Activation of RAS in B cells', reactome=1169092),
            Pathway(description='abortive mitotic cell cycle', gene_ontology=33277),
            Pathway(description='amacrine cell differentiation', gene_ontology=35881),
            Pathway(description='amniotic stem cell differentiation', gene_ontology=97086)
        ]

        db.session.add_all(pathways)

        # test partial matching and Reactome id pathways search
        for ras_activation_query in ['Activation', 'REAC:1', 'REAC:1169092']:
            response = autocomplete(ras_activation_query)
            entry = get_entry_and_check_type(response, 'pathway')
            assert entry['name'].startswith('Activation of RAS in B cells')

        # test Gene Ontology search:
        response = autocomplete('GO:33')
        go_pathway = get_entry_and_check_type(response, 'pathway')
        assert go_pathway['name'] == 'abortive mitotic cell cycle (GO:33277)'

        # check if multiple pathways are returned
        response = autocomplete('differentiation')
        assert len(response.json['entries']) == 2

        # check if both genes an pathways are returned simultaneously
        # there should be: a pathway ('a>b<ortive...') and the >B<R gene
        response = autocomplete('b')
        entries = response.json['entries']
        names = [entry['name'] for entry in entries]
        assert all([name in names for name in ['BR', 'abortive mitotic cell cycle']])

        # check if "search more pathways" is displayed
        response = autocomplete('cell')    # cell occurs in all four of added pathways;
        # as a limit of pathways shown is 3, we should get a "show more" link
        links = entries_with_type(response, 'see_more')
        assert len(links) == 1
        assert links[0]['name'] == 'Show all pathways matching <i>cell</i>'

        # test case insensitive text search
        response = autocomplete('AMNIOTIC STEM')
        pathways = entries_with_type(response, 'pathway')
        assert len(pathways) == 1
        assert pathways[0]['name'] == 'amniotic stem cell differentiation'

        # Disease
        disease_names = [
            'Cystic fibrosis', 'Polycystic kidney disease 2',
            'Frontotemporal dementia', 'Cataract, nuclear total'
        ]
        diseases = {name: Disease(name=name) for name in disease_names}
        db.session.add_all(diseases.values())

        response = autocomplete('cystic')
        cystic_matching = entries_with_type(response, 'disease')
        # both 'Cystic fibrosis' and PKD2 should match
        assert len(cystic_matching) == 2

        # is comma containing disease name properly linked?
        response = autocomplete('Cataract')
        cataract = get_entry_and_check_type(response, 'disease')
        assert cataract['name'] == 'Cataract, nuclear total'

        # Gene mutation in disease

        # test suggestions
        response = autocomplete('cystic ')
        entry = entries_with_type(response, 'message')[0]
        assert re.match('Do you wish to search for (.*?) mutations\?', entry['name'])

        # currently there are no mutations associated with any disease
        # so the auto-completion should not return any results
        response = autocomplete('cystic in ')
        assert not response.json['entries']

        # let's add a mutation
        m = Mutation(protein=p, position=1, alt='Y')
        bdb_refseq['BR X1Y'] = ['NM_007']
        # note: sig_code is required here
        data = ClinicalData(disease=diseases['Cystic fibrosis'], sig_code=1)
        disease_mutation = InheritedMutation(mutation=m, clin_data=[data])
        db.session.add_all([m, data, disease_mutation])

        # should return '.. in BR' suggestion now.
        for query in ['cystic in', 'cystic in ']:
            response = autocomplete(query)
            result = get_entry_and_check_type(response, 'disease_in_protein')
            assert result['gene'] == 'BR'
            assert result['name'] == 'Cystic fibrosis'

        # both gene search and refseq search should yield the same, non-empty results
        results = []

        for query in ['cystic in BR', 'cystic in NM_007', 'cystic in 007']:
            response = autocomplete(query)
            result = get_entry_and_check_type(response, 'disease_in_protein')
            results.append(result)

        assert all(r == result for r in results) and result
Beispiel #3
0
    def test_mutated_sites(self):

        g = Gene(name='Gene X')
        p = Protein(refseq='NM_007', sequence='ABCDEFGHIJKLMNOPQRSTUVWXYZ', gene=g)
        g.preferred_isoform = p

        glycosylation = SiteType(name='glycosylation')

        sites = {
            # ClinVar muts and TCGA muts but different, with total count = 5 (3 + 2)
            'A': Site(position=1, residue='A', protein=p),
            # ClinVar muts intersection TCGA muts, total count = 4 (2 + 2)
            'K': Site(position=11, residue='K', protein=p),
            # Only TCGA muts, total count = 3 (1 + 2)
            'U': Site(position=21, residue='U', protein=p, types={glycosylation})
        }

        def mut(pos):
            return Mutation(position=pos, alt='X', protein=p)

        intersecting_mut = mut(11)

        mutations = [
            # the first site (1 A)
            InheritedMutation(
                mutation=mut(1),
                clin_data=[ClinicalData(), ClinicalData(), ClinicalData()]
            ),
            MC3Mutation(mutation=mut(2), count=2),
            # the second site (11 K)
            InheritedMutation(
                mutation=intersecting_mut,
                clin_data=[ClinicalData(), ClinicalData()]
            ),
            MC3Mutation(mutation=intersecting_mut, count=2),
            # the third site (21 U)
            MC3Mutation(mutation=mut(20), count=1),
            MC3Mutation(mutation=mut(22), count=2),
        ]

        db.session.add_all(mutations)
        db.session.add_all([p, g])
        db.session.add_all(sites.values())
        db.session.commit()

        sites_with_clinvar = most_mutated_sites([InheritedMutation]).all()
        assert sites_with_clinvar == [(sites['A'], 3), (sites['K'], 2)]

        sites_with_mc3 = most_mutated_sites([MC3Mutation]).all()
        assert set(sites_with_mc3) == {(sites['A'], 2), (sites['K'], 2), (sites['U'], 3)}

        both_sources = [MC3Mutation, InheritedMutation]

        sites_with_muts_in_both_intersection = most_mutated_sites(both_sources, intersection=True).all()
        assert sites_with_muts_in_both_intersection == [(sites['K'], 4)]

        sites_with_muts_in_both = most_mutated_sites(both_sources, intersection=False).all()
        assert sites_with_muts_in_both == [(sites['A'], 5), (sites['K'], 4)]

        glyco_sites_with_mc3 = most_mutated_sites([MC3Mutation], site_type=glycosylation).all()
        assert glyco_sites_with_mc3 == [(sites['U'], 3)]