def test_search_proteins(self): from views.search import search_proteins # create 15 genes and proteins mock_proteins_and_genes(15) # control: do we start with the mocked proteins not others? assert not search_proteins('TP53') # does respect limit? does symbol search work? results = search_proteins('Gene', 10) assert results assert len(results) == 10 assert results[0].name.startswith('Gene') # are results sorted? db.session.add_all([ Gene(name=name, preferred_isoform=Protein(refseq='NM_%s' % 20 * i)) for i, name in enumerate(['TPK', 'TPKK']) ]) results = search_proteins('TPK', 2) assert results[0].name == 'TPK' assert results[0].best_score < results[1].best_score # does include both: refseq and symbol search? assert search_proteins('NM_0003', 1) # can we change subset of searched features? assert not search_proteins('NM_0003', 1, features=['gene_symbol']) assert not search_proteins('Gene', 1, features=['refseq'])
def test_gene_symbol(self): from search.gene import SymbolGeneSearch # create 15 genes and proteins mock_proteins_and_genes(10) search = SymbolGeneSearch().search # negative control assert not search('TP53') # symbol absent assert not search('NM_0000') # refseq not a symbol # limiting results = search('Gene', limit=5) assert len(results) == 5 assert results[0].name.startswith('Gene') # should not be case sensitive results = search('gene') assert results # should ignore flanking whitespaces for query in ('gene ', 'gene ', ' gene', ' gene '): assert search(query)
def test_protein_name(self): mock_proteins_and_genes(2) protein = Protein.query.filter_by(refseq='NM_0001').one() protein.full_name = 'Important protein' search = ProteinNameSearch().search assert search('important')
def test_gene_name(self): # this is exactly the same as gene_symbol mock_proteins_and_genes(6) search = GeneNameSearch().search results = search('Full name of gene', limit=5) assert len(results) == 5 assert results[0].full_name.startswith('Full name of gene')
def test_autocomplete_all_proteins(self): # MC3 GeneList is required as a target (a href for links) where users will be pointed # after clicking of cancer autocomplete suggestion gene_list = GeneList(name='TCGA', mutation_source_name=MC3Mutation.name) db.session.add(gene_list) mock_proteins_and_genes(15) response = self.client.get( 'search/autocomplete_all?q=%s' % 'Gene', follow_redirects=True ) assert response.status_code == 200 assert response.json['entries'][0]['name'].startswith('Gene')
def test_summary(self): mock_proteins_and_genes(2) protein = Protein.query.filter_by(refseq='NM_0001').one() protein.summary = 'This is an important protein for the FooBar pathway' search = SummarySearch(minimal_length=3).search for accepted in ['FooBar', 'foobar', 'foobar pathway']: assert search(accepted) # too short assert not search('an') # negative control assert not search('cancer')
def test_refseq(self): from search.gene import RefseqGeneSearch # create 15 genes and proteins mock_proteins_and_genes(10) search = RefseqGeneSearch().search # negative control for phase in ['9999', 'NM_00000', 'Gene']: assert not search(phase) # limiting results = search('NM_', limit=5) assert len(results) == 5 assert results[0].name.startswith('Gene') # test the search itself for refseq in ['NM_0003', 'nm_0003', '0003']: results = search(refseq) assert len(results) == 1 assert results[0].name == 'Gene_3' isoforms = results[0].matched_isoforms assert len(isoforms) == 1 assert isoforms.pop().refseq == 'NM_0003' db.session.add_all([ Gene(name='Gene X', isoforms=[ Protein(refseq='NM_000301'), Protein(refseq='NM_000302'), ]), Gene(name='Gene Y', isoforms=[Protein(refseq='NM_000309')]) ]) # so there are three genes with isoforms starting with NM_0003 # (those are Gene_3, Gene X, Gene Y). Let see if limiting work # well when applied per-gene. queries = {'NM_0003': 2, 'NM_00030': 2, 'NM_000301': 1, 'NM_000302': 1} for query, expected_result in queries.items(): assert len(search(query, limit=2)) == expected_result
def test_autocomplete_proteins(self): mock_proteins_and_genes(5) g = Gene(name='Gene: 6', preferred_isoform=Protein(refseq='NM_006')) db.session.add(g) for route in ('autocomplete_proteins', 'proteins'): for accepted_gene_2_query in ('Gene_2', 'Gene', 'gene', 'Gene_2 ', ' gene', 'gene%20'): print(route, accepted_gene_2_query) response = self.client.get( 'search/%s?filters=Search.query:eq:%s' % (route, accepted_gene_2_query), follow_redirects=True ) assert response.status_code == 200 assert b'Gene_2' in response.data assert b'NM_0002' in response.data response = self.client.get( 'search/autocomplete_proteins?filters=Search.query:eq:Gene: ', follow_redirects=True ) assert response.status_code == 200 assert b'Gene: 6' in response.data