Example #1
0
    def test_search_proteins(self):
        from views.search import search_proteins

        # create 15 genes and proteins
        mock_proteins_and_genes(15)

        # control: do we start with the mocked proteins not others?
        assert not search_proteins('TP53')

        # does respect limit? does symbol search work?
        results = search_proteins('Gene', 10)

        assert results
        assert len(results) == 10

        assert results[0].name.startswith('Gene')

        # are results sorted?
        db.session.add_all([
            Gene(name=name, preferred_isoform=Protein(refseq='NM_%s' % 20 * i))
            for i, name in enumerate(['TPK', 'TPKK'])
        ])
        results = search_proteins('TPK', 2)
        assert results[0].name == 'TPK'
        assert results[0].best_score < results[1].best_score

        # does include both: refseq and symbol search?
        assert search_proteins('NM_0003', 1)

        # can we change subset of searched features?
        assert not search_proteins('NM_0003', 1, features=['gene_symbol'])
        assert not search_proteins('Gene', 1, features=['refseq'])
    def test_gene_symbol(self):
        from search.gene import SymbolGeneSearch

        # create 15 genes and proteins
        mock_proteins_and_genes(10)

        search = SymbolGeneSearch().search

        # negative control
        assert not search('TP53')  # symbol absent
        assert not search('NM_0000')  # refseq not a symbol

        # limiting
        results = search('Gene', limit=5)
        assert len(results) == 5

        assert results[0].name.startswith('Gene')

        # should not be case sensitive
        results = search('gene')
        assert results

        # should ignore flanking whitespaces
        for query in ('gene ', 'gene   ', ' gene', ' gene '):
            assert search(query)
    def test_protein_name(self):
        mock_proteins_and_genes(2)
        protein = Protein.query.filter_by(refseq='NM_0001').one()
        protein.full_name = 'Important protein'

        search = ProteinNameSearch().search

        assert search('important')
    def test_gene_name(self):
        # this is exactly the same as gene_symbol
        mock_proteins_and_genes(6)

        search = GeneNameSearch().search
        results = search('Full name of gene', limit=5)

        assert len(results) == 5
        assert results[0].full_name.startswith('Full name of gene')
Example #5
0
    def test_autocomplete_all_proteins(self):
        # MC3 GeneList is required as a target (a href for links) where users will be pointed
        # after clicking of cancer autocomplete suggestion
        gene_list = GeneList(name='TCGA', mutation_source_name=MC3Mutation.name)
        db.session.add(gene_list)

        mock_proteins_and_genes(15)

        response = self.client.get(
            'search/autocomplete_all?q=%s' % 'Gene',
            follow_redirects=True
        )
        assert response.status_code == 200
        assert response.json['entries'][0]['name'].startswith('Gene')
    def test_summary(self):
        mock_proteins_and_genes(2)
        protein = Protein.query.filter_by(refseq='NM_0001').one()
        protein.summary = 'This is an important protein for the FooBar pathway'

        search = SummarySearch(minimal_length=3).search

        for accepted in ['FooBar', 'foobar', 'foobar pathway']:
            assert search(accepted)

        # too short
        assert not search('an')

        # negative control
        assert not search('cancer')
    def test_refseq(self):
        from search.gene import RefseqGeneSearch

        # create 15 genes and proteins
        mock_proteins_and_genes(10)

        search = RefseqGeneSearch().search

        # negative control
        for phase in ['9999', 'NM_00000', 'Gene']:
            assert not search(phase)

        # limiting
        results = search('NM_', limit=5)
        assert len(results) == 5

        assert results[0].name.startswith('Gene')

        # test the search itself
        for refseq in ['NM_0003', 'nm_0003', '0003']:
            results = search(refseq)
            assert len(results) == 1
            assert results[0].name == 'Gene_3'

            isoforms = results[0].matched_isoforms
            assert len(isoforms) == 1
            assert isoforms.pop().refseq == 'NM_0003'

        db.session.add_all([
            Gene(name='Gene X',
                 isoforms=[
                     Protein(refseq='NM_000301'),
                     Protein(refseq='NM_000302'),
                 ]),
            Gene(name='Gene Y', isoforms=[Protein(refseq='NM_000309')])
        ])

        # so there are three genes with isoforms starting with NM_0003
        # (those are Gene_3, Gene X, Gene Y). Let see if limiting work
        # well when applied per-gene.

        queries = {'NM_0003': 2, 'NM_00030': 2, 'NM_000301': 1, 'NM_000302': 1}

        for query, expected_result in queries.items():
            assert len(search(query, limit=2)) == expected_result
Example #8
0
    def test_autocomplete_proteins(self):
        mock_proteins_and_genes(5)
        g = Gene(name='Gene: 6', preferred_isoform=Protein(refseq='NM_006'))
        db.session.add(g)

        for route in ('autocomplete_proteins', 'proteins'):
            for accepted_gene_2_query in ('Gene_2', 'Gene', 'gene', 'Gene_2 ', ' gene', 'gene%20'):
                print(route, accepted_gene_2_query)
                response = self.client.get(
                    'search/%s?filters=Search.query:eq:%s' % (route, accepted_gene_2_query),
                    follow_redirects=True
                )
                assert response.status_code == 200
                assert b'Gene_2' in response.data
                assert b'NM_0002' in response.data

        response = self.client.get(
            'search/autocomplete_proteins?filters=Search.query:eq:Gene: ',
            follow_redirects=True
        )
        assert response.status_code == 200
        assert b'Gene: 6' in response.data