def get(self, term): """ Returns list of matching concepts or entities using lexical search """ args = simple_parser.parse_args() q = GolrSearchQuery(term, user_agent=USER_AGENT, **args) results = q.search() return results
def test_search_go_ontol(): q = GolrSearchQuery("transport*", category='ontology_class', is_go=True) print("Q={}".format(q)) params = q.solr_params() print("PARAMS={}".format(params)) results = q.search() print("RESULTS={}".format(results)) docs = results.docs for doc in docs: print(str(doc)) assert len(docs) > 0
def test_search(): q = GolrSearchQuery("abnormal") print("Q={}".format(q)) params = q.solr_params() print("PARAMS={}".format(params)) results = q.search() print("RESULTS={}".format(results)) docs = results.docs for r in docs: print(str(r)) assert len(docs) > 0
def test_search_go_all(): q = GolrSearchQuery("transport*", is_go=True) print("Q={}".format(q)) params = q.solr_params() print("PARAMS={}".format(params)) results = q.search() print("RESULTS={}".format(results)) docs = results.docs for r in docs: print(str(r)) assert len(docs) > 0 print(str(results.facet_counts))
def test_cursor(): """ Tests rows and start parameters. First fetch 100 docs, then same query but iterate with cursor in increments of ten. The two sets of IDs returned should be identical """ q = GolrSearchQuery("abnormal", rows=100) results = q.search() docs = results.docs ids = set([d['id'] for d in docs]) print('Init ids={}'.format(ids)) assert len(ids) == 100 matches = set() for i in range(0, 10): q = GolrSearchQuery("abnormal", start=i * 10, rows=10) docs = q.search().docs next_ids = [d['id'] for d in docs] assert len(next_ids) == 10 print('Next ids (from {}) = {}'.format(i * 10, next_ids)) matches.update(next_ids) assert len(matches) == 100 assert len(matches.intersection(ids)) == 100
def clean_feature_ids(id: str) -> str: """ MME queries often need to be sanitized before going into owlsim, for example: MIM:610536 -> OMIM:610536 SHH -> HGNC:10848 """ if ':' in id: prefix, reference = id.split(':') if prefix == 'MIM': id = 'OMIM:' + reference else: # Assume it's a label and look it up # Assume it's human, and make sure it's an exact match query = GolrSearchQuery(id, taxon=['NCBITaxon:9606'], min_match="100%") results = query.search() if results.docs: id = results.docs[0]['id'] return id
class TestGolrSearchQuery(): @classmethod def setup_class(self): self.manager = GolrSearchQuery() # Mock the PySolr search function to # return our test docs input_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/solr-docs.json') input_docs = json.load(open(input_fh)) self.test_results = pysolr.Results(input_docs) self.manager.solr.search = MagicMock(return_value=self.test_results) @classmethod def teardown_class(self): self.manager = None def test_longest_hl(self): test_data = [ "<em>Muscle</em> <em>atrophy</em>, generalized", "Generalized <em>muscle</em> degeneration", "Diffuse skeletal <em>muscle</em> wasting" ] expected = "<em>Muscle</em> <em>atrophy</em>, generalized" results = self.manager._get_longest_hl(test_data) assert expected == results def test_longest_hl_ambiguous(self): test_data = [ "<em>Muscle</em> <em>atrophy</em>, generalized", "Generalized <em>muscle</em> degeneration", "Diffuse skeletal <em>muscle</em> wasting", "<em>Muscle</em> <em>atrophy</em>, not generalized", ] expected = "<em>Muscle</em> <em>atrophy</em>, generalized" results = self.manager._get_longest_hl(test_data) assert expected == results def test_hl_to_string(self): test_data = "Foo <em>Muscle</em> bar <em>atrophy</em>, generalized" expected = "Foo Muscle bar atrophy, generalized" results = self.manager._hl_as_string(test_data) assert expected == results def test_invalid_xml(self): test_data = "Foo<Foo> <em>Muscle</em> bar <em>atrophy</em>, generalized" pytest.raises(ET.ParseError, self.manager._hl_as_string, test_data) def test_autocomplete_doc_conversion(self): """ Given a sample solr output as a pysolr.Results object test that _process_autocomplete_results returns the expected object """ expected_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/autocomplete-expected.json') processed_docs = json.load(open(expected_fh)) output_docs = self.manager._process_autocomplete_results(self.test_results) assert json.dumps(processed_docs, sort_keys=True) == \ json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True) def test_search_doc_conversion(self): """ Given a sample solr output as a pysolr.Results object test that _process_autocomplete_results returns the expected object """ expected_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/search-expected.json') processed_docs = json.load(open(expected_fh)) output_docs = self.manager._process_search_results(self.test_results) assert json.dumps(processed_docs, sort_keys=True) == \ json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True) def test_search(self): """ Given a mock PySolr.search method test that search() returns the expected object """ expected_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/search-expected.json') processed_docs = json.load(open(expected_fh)) output_docs = self.manager.search() assert json.dumps(processed_docs, sort_keys=True) == \ json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True) def test_autocomplete(self): """ Given a mock PySolr.search method test that autocomplete() returns the expected object """ expected_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/autocomplete-expected.json') processed_docs = json.load(open(expected_fh)) output_docs = self.manager.autocomplete() assert json.dumps(processed_docs, sort_keys=True) == \ json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True) def test_autocomplete_no_category(self): """ Test for document without a category """ # Provide a new mock file input_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/autocomplete-nocat.json') input_docs = json.load(open(input_fh)) self.test_results = pysolr.Results(input_docs) self.manager.solr.search = MagicMock(return_value=self.test_results) expected_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/autocomplete-nocat-expect.json') processed_docs = json.load(open(expected_fh)) output_docs = self.manager.autocomplete() print(json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True)) assert json.dumps(processed_docs, sort_keys=True) == \ json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True)
def search(term, args): q = GolrSearchQuery(term, args) return q.search()