def test_string_query(self): ''' Test building and running a string query. ''' query = ElasticQuery.query_string("rs2476601", fields=["id"]) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT')) docs = elastic.search() self.assertTrue(len(docs.docs) == 1, "Elastic string query retrieved marker (rs2476601)") self.assertRaises(QueryError, ElasticQuery.query_string, "rs2476601", fieldssss=["id"])
def test_scan_and_scroll(self): ''' Test scan and scroll interface. ''' def check_hits(resp_json): self.assertTrue('hits' in resp_json, 'scan and scroll hits') self.assertGreaterEqual(len(resp_json['hits']['hits']), 1) ScanAndScroll.scan_and_scroll(ElasticSettings.idx('DEFAULT'), call_fun=check_hits) ScanAndScroll.scan_and_scroll(ElasticSettings.idx('DEFAULT'), call_fun=check_hits, query=ElasticQuery.query_string("rs2476601", fields=["id"]))
def test_pub_ini_file2(self): ''' Test publication pipeline with a list of PMIDs. ''' out = StringIO() call_command('publications', '--dir', TEST_DATA_DIR, '--steps', 'load', sections='DISEASE::TEST', ini=MY_PUB_INI_FILE, stdout=out) INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE) idx = INI_CONFIG['DISEASE']['index'] Search.index_refresh(idx) query = ElasticQuery.query_string("test", fields=["tags.disease"]) elastic = Search(query, idx=idx) docs = elastic.search().docs self.assertGreater(len(docs), 1)
def test_gene_pipeline(self): """ Test gene pipeline. """ INI_CONFIG = IniParser().read_ini(MY_INI_FILE) idx = INI_CONFIG["ENSEMBL_GENE_GTF"]["index"] idx_type = INI_CONFIG["ENSEMBL_GENE_GTF"]["index_type"] """ 1. Test ensembl GTF loading. """ call_command( "pipeline", "--steps", "stage", "load", sections="ENSEMBL_GENE_GTF", dir=TEST_DATA_DIR, ini=MY_INI_FILE ) Search.index_refresh(idx) elastic = Search(idx=idx, idx_type=idx_type) self.assertGreaterEqual(elastic.get_count()["count"], 1, "Count documents in the index") map1_props = Gene.gene_mapping(idx, idx_type, test_mode=True).mapping_properties map2_props = elastic.get_mapping() if idx not in map2_props: logger.error("MAPPING ERROR: " + json.dumps(map2_props)) self._cmpMappings(map2_props[idx]["mappings"], map1_props, idx_type) """ 2. Test adding entrez ID to documents """ call_command("pipeline", "--steps", "load", sections="GENE2ENSEMBL", dir=TEST_DATA_DIR, ini=MY_INI_FILE) Search.index_refresh(idx) query = ElasticQuery.query_string("PTPN22", fields=["symbol"]) elastic = Search(query, idx=idx) docs = elastic.search().docs self.assertEqual(len(docs), 1) self.assertTrue("entrez" in getattr(docs[0], "dbxrefs")) self.assertEqual(getattr(docs[0], "dbxrefs")["entrez"], "26191") """ 3. Add uniprot and fill in missing entrez fields. """ call_command( "pipeline", "--steps", "download", "load", sections="ENSMART_GENE", dir=TEST_DATA_DIR, ini=MY_INI_FILE ) Search.index_refresh(idx) query = ElasticQuery.query_string("DNMT3L", fields=["symbol"]) elastic = Search(query, idx=idx) docs = elastic.search().docs self.assertTrue("entrez" in getattr(docs[0], "dbxrefs")) self.assertTrue("swissprot" in getattr(docs[0], "dbxrefs")) """ 4. Add gene synonyms and dbxrefs. """ call_command("pipeline", "--steps", "load", sections="GENE_INFO", dir=TEST_DATA_DIR, ini=MY_INI_FILE) Search.index_refresh(idx) query = ElasticQuery.query_string("PTPN22", fields=["symbol"]) elastic = Search(query, idx=idx) docs = elastic.search().docs self.assertTrue("PTPN8" in getattr(docs[0], "synonyms")) """ 5. Add PMIDs to gene docs. """ call_command("pipeline", "--steps", "load", sections="GENE_PUBS", dir=TEST_DATA_DIR, ini=MY_INI_FILE) Search.index_refresh(idx) query = ElasticQuery.query_string("PTPN22", fields=["symbol"]) elastic = Search(query, idx=idx) docs = elastic.search().docs self.assertGreater(len(getattr(docs[0], "pmids")), 0) """ 6. Add ortholog data. """ call_command("pipeline", "--steps", "load", sections="ENSMART_HOMOLOG", dir=TEST_DATA_DIR, ini=MY_INI_FILE) Search.index_refresh(idx) query = ElasticQuery.query_string("PTPN22", fields=["symbol"]) elastic = Search(query, idx=idx) docs = elastic.search().docs dbxrefs = getattr(docs[0], "dbxrefs") self.assertTrue("orthologs" in dbxrefs, dbxrefs) self.assertTrue("mmusculus" in dbxrefs["orthologs"], dbxrefs) self.assertEqual("ENSMUSG00000027843", dbxrefs["orthologs"]["mmusculus"]["ensembl"]) query = ElasticQuery.filtered( Query.match_all(), TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl", ["ENSMUSG00000027843"]), ) docs = Search(query, idx=idx, size=1).search().docs self.assertEqual(len(docs), 1) """ 7. Add mouse ortholog link to MGI """ call_command("pipeline", "--steps", "load", sections="ENSEMBL2MGI", dir=TEST_DATA_DIR, ini=MY_INI_FILE) Search.index_refresh(idx) docs = Search(query, idx=idx, size=1).search().docs dbxrefs = getattr(docs[0], "dbxrefs") self.assertEqual("ENSMUSG00000027843", dbxrefs["orthologs"]["mmusculus"]["ensembl"]) self.assertEqual("107170", dbxrefs["orthologs"]["mmusculus"]["MGI"])
def test_string_query_with_wildcard_and_highlight(self): highlight = Highlight("id", pre_tags="<strong>", post_tags="</strong>") query = ElasticQuery.query_string("rs*", fields=["id"], highlight=highlight) search = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5) self.assertTrue(len(search.search().docs) > 1, "Elastic string query retrieved marker (rs*)")
def test_string_query_with_wildcard(self): query = ElasticQuery.query_string("rs*", fields=["id"]) elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5) result = elastic.search() self.assertTrue(result.hits_total > 1, "Elastic string query retrieved marker (rs*)")