def test_generates_vids_of_the_found_docs(self): class MyDict(dict): pass class FakeSearcher(object): def search(self, query, limit=20): # returns result of the search need by search_datasets. result1 = MyDict({'vid': 'vid1', 'bvid': 'bvid1', 'type': 'type1'}) result1.score = 0.5 result2 = MyDict({'vid': 'vid2', 'bvid': 'bvid2', 'type': 'b'}) result2.score = 0.6 return [result1, result2] def __enter__(self, *args, **kwargs): return self def __exit__(self, *args, **kwargs): pass class FakeIdentifierIndex(object): schema = '?' def searcher(*args, **kwargs): return FakeSearcher() search = Search(self.lib) search._dataset_index = FakeIdentifierIndex() ret = search.search_partitions('about me') self.assertTrue(hasattr(ret, 'next')) vids = [x for x in ret] self.assertIn('vid1', vids) self.assertIn('vid2', vids)
def test_add_document_to_writer_for_each_given_identifier(self): # prepare state # TODO: It is so complicated. Find another way to mock indexer. fake_writer = fudge.Fake()\ .expects('add_document')\ .expects('commit') class FakeSearcher(object): pass FakeSearcher.documents = fudge.Fake()\ .expects_call()\ .returns([]) search = Search(self.lib) search._identifier_index = fudge.Fake('Index')\ .provides('writer')\ .returns(fake_writer)\ .provides('searcher')\ .returns(FakeSearcher()) # testing identifiers = [ {'identifier': 'ident1', 'type': 'type1', 'name': 'name1'}, {'identifier': 'ident2', 'type': 'type2', 'name': 'name2'}] search.index_identifiers(identifiers) fudge.verify()
def test_returns_given_terms_if_place_vids_do_not_exist(self): # prepare state search = Search(self.lib) # testing ret = search.expand_place_ids('California') self.assertEquals(ret, 'California')
def test_uses_cached_identifier(self): # prepare state search = Search(self.lib) # Use a string as index, it is not valid index, but it does not matter here. search._identifier_index = 'INDEX' self.assertEquals(search.identifier_index, 'INDEX')
def test_removes_index_dir(self, fake_exists, fake_rmtree): # prepare state. search = Search(self.lib) fake_exists.expects_call().with_args(search.d_index_dir).returns(True) fake_rmtree.expects_call().with_args(search.d_index_dir) # testing search.reset() self.assertIsNone(search._dataset_index)
def test_returns_years_range(self): # prepare state search = Search(self.lib) # testing from_year = 1995 to_year = 1996 ret = search.from_to_as_term(from_year, to_year) self.assertEquals(ret, '[1995 TO 1996]')
def test_returns_second_year_if_wrong_first_given(self): # prepare state search = Search(self.lib) # testing from_year = '1996' to_year = 'not-year' ret = search.from_to_as_term(from_year, to_year) self.assertEquals(ret, '[1996 TO]')
def test_returns_None_if_both_are_wrong(self): # prepare state search = Search(self.lib) # testing from_year = 'not-year' to_year = 'not-year' ret = search.from_to_as_term(from_year, to_year) self.assertIsNone(ret)
def test_generates_vids_found_by_searcher(self): # prepare state. search = Search(self.lib) search._dataset_index = self._get_fake_identifier() # testing datasets_gen = search.datasets self.assertTrue(hasattr(datasets_gen, 'next')) datasets = [x for x in datasets_gen] self.assertEquals(datasets, ['vid2'])
def test_indexes_library_datasets(self): # prepare state. DatasetFactory() DatasetFactory() search = Search(self.lib) search.index_dataset = fudge.Fake().expects_call() # testing with fudge.patched_context(search, 'all_datasets', []): search.index_datasets()
def test_uses_library_driver_backend(self): self._my_library.config.services.search = None # switch to sqlite. self._my_library.database.driver = 'sqlite' search = Search(self._my_library) self.assertIsInstance(search.backend, SQLiteSearchBackend) # switch to postgres. self._my_library.database.driver = 'postgres' search = Search(self._my_library) self.assertIsInstance(search.backend, PostgreSQLSearchBackend)
def test_contains_generator_with_documents_found_by_searcher(self): # prepare state search = Search(self.lib) search._identifier_index = self._get_fake_identifier() # testing identifiers_gen = search.identifiers self.assertTrue(hasattr(identifiers_gen, 'next')) identifiers = [x for x in identifiers_gen] self.assertEquals(len(identifiers), 2) self.assertIn('identifier', identifiers[0]) self.assertIn('identifier', identifiers[1])
def test_generates_vids_of_the_partitions_found_by_searcher(self): # prepare state. search = Search(self.lib) search._dataset_index = self._get_fake_identifier() # testing partitions_gen = search.partitions # it returns generator. self.assertTrue(hasattr(partitions_gen, 'next')) partitions = [x for x in partitions_gen] self.assertEquals(partitions, ['vid1'])
def test_generates_results_found_by_searcher(self): # prepare state search = Search(self.lib) search._identifier_index = self._get_fake_identifier() # testing ret = search.search_identifiers('about me') # it is a generator. self.assertTrue(hasattr(ret, 'next')) expected_result = [(0.5, 'bvid1', 'p', False), (0.6, 'bvid2', 'b', False)] result = [x for x in ret] self.assertEquals(result, expected_result)
def test_indexes_library_datasets(self): ds1 = MagicMock(spec=Dataset) ds2 = MagicMock(spec=Dataset) ds3 = MagicMock(spec=Dataset) self._my_library.datasets = [ds1, ds2, ds3] fake_backend = MagicMock(spec=SQLiteSearchBackend) fake_backend.dataset_index = Mock() fake_backend.partition_index = Mock() fake_backend.identifier_index = Mock() search = Search(self._my_library, backend=fake_backend) search.index_library_datasets() self.assertEqual(len(fake_backend.dataset_index.index_one.mock_calls), 3)
def test_tick_fn_gets_each_vid(self): # prepare state. DatasetFactory() DatasetFactory() search = Search(self.lib) search.index_dataset = fudge.Fake().expects_call() tick_f = fudge.Fake()\ .expects_call().with_args('datasets: 1 partitions: 0')\ .next_call().with_args('datasets: 2 partitions: 0') # testing with fudge.patched_context(search, 'all_datasets', []): search.index_datasets(tick_f=tick_f)
def test_logs_error_to_library_logger(self, fake_exists): # prepare state. SCHEMA = 'schema' # We have to create Search instance before mocking because __init__ uses os modules. search = Search(self.lib) fake_exists.expects_call().raises(Exception('My fake exception.')) fake_error = fudge.Fake('error').expects_call() # testing with fudge.patched_context(self.sqlite_db.logger, 'error', fake_error): with self.assertRaises(Exception): search.get_or_new_index(SCHEMA, search.d_index_dir)
def test_uses_backend_from_config(self, fake_init): # Disable backend initialization to reduce amount of mocks. fake_init.return_value = None self._my_library.config.services.search = 'whoosh' search = Search(self._my_library) self.assertIsInstance(search.backend, WhooshSearchBackend)
def test_opens_existing_index_if_path_exists(self, fake_exists): # first assert signatures of the functions we are going to mock did not change. assert_spec(index.open_dir, ['dirname', 'indexname', 'readonly', 'schema']) # prepare state. SCHEMA = 'schema' # We have to create Search instance before mocking because __init__ uses os modules. search = Search(self.lib) fake_exists.expects_call().with_args(search.d_index_dir).returns(True) fake_open_dir = fudge.Fake().expects_call().with_args(search.d_index_dir) # testing with fudge.patched_context(index, 'open_dir', fake_open_dir): search.get_or_new_index(SCHEMA, search.d_index_dir)
def test_raises_missing_backend_exception_if_config_contains_invalid_backend( self): # services.search try: Search(self._my_library) except Exception as exc: self.assertIn('Missing backend', str(exc))
def test_returns_dict_with_datasets_found_by_searcher(self): search = Search(self.lib) search._dataset_index = self._get_fake_identifier() ret = search.search_datasets('about me') self.assertIsInstance(ret, dict) self.assertIn('bvid1', ret) self.assertIn('bvid2', ret) # scores copied properly self.assertEquals(ret['bvid1'].p_score, 0.5) self.assertEquals(ret['bvid1'].b_score, 0) self.assertEquals(ret['bvid2'].p_score, 0) self.assertEquals(ret['bvid2'].b_score, 0.6)
def test_uses_default_backend_if_library_database_search_is_not_implemented( self, fake_init): # Disable backend initialization to reduce amount of mocks. fake_init.return_value = None self._my_library.config.services.search = None with patch.object(self._my_library.database, 'driver', 'mysql'): search = Search(self._my_library) self.assertIsInstance(search.backend, WhooshSearchBackend)
def test_creates_new_index_if_path_does_not_exist(self, fake_exists, fake_makedirs): # first assert signatures of the functions we are going to mock did not change. assert_spec(index.create_in, ['dirname', 'schema', 'indexname']) # prepare state. SCHEMA = 'schema' DIR = 'the-dir' # We have to create Search instance before mocking because __init__ uses os modules. search = Search(self.lib) fake_exists.expects_call().with_args(DIR).returns(False) fake_makedirs.expects_call().with_args(DIR) fake_create_in = fudge.Fake().expects_call().with_args(DIR, SCHEMA) # testing with fudge.patched_context(index, 'create_in', fake_create_in): search.get_or_new_index(SCHEMA, DIR)
def test_returns_place_vids(self): # first assert signatures of the functions we are going to mock did not change. assert_spec(Search.search_identifiers, ['self', 'search_phrase', 'limit']) assert_spec(GVid.parse, ['cls', 'gvid']) # prepare state search = Search(self.lib) score = 1 vid = 'vid-1' t = 'type' name = 'California1' fake_search = fudge.Fake().expects_call().returns([(score, vid, t, name)]) fake_parse = fudge.Fake().expects_call().returns([]) # testing with fudge.patched_context(Search, 'search_identifiers', fake_search): with fudge.patched_context(GVid, 'parse', fake_parse): ret = search.expand_place_ids('California') self.assertEquals(ret, [vid])
def test_feeds_tick_function_with_indexed_dataset(self): # prepare mocks fake_backend = MagicMock(spec=SQLiteSearchBackend) fake_backend.dataset_index = Mock() fake_backend.partition_index = Mock() fake_backend.identifier_index = Mock() tick_f = Mock() fake_library = MagicMock(spec=Library) fake_dataset = MagicMock(spec=Dataset) fake_library.datasets = [fake_dataset] # run search = Search(fake_library, backend=fake_backend) search.index_library_datasets(tick_f=tick_f) # test tick_f.assert_called_once_with('datasets: 1 partitions: 0')
def test_postgres_query(self): self._my_library.config.services.search = None # switch to postgres. self._my_library.database.driver = 'postgres' search = Search(self._my_library) self.assertIsInstance(search.backend, PostgreSQLSearchBackend) be = search.backend pg = be._get_dataset_index() q = pg._make_query_from_terms( 'source healthindicators.gov diabetes asthma') print str(q[0]) print q[1]
def __init__(self, config=None, search=None, echo=None, read_only=False): from sqlalchemy.exc import OperationalError from ambry.orm.exc import DatabaseMissingError if config: self._config = config else: self._config = get_runconfig() self.logger = logger self.read_only = read_only # allow optimizations that assume we aren't building bundles. self._echo = echo self._fs = LibraryFilesystem(config) self._db = Database(self._fs.database_dsn, echo=echo) self._account_password = self.config.accounts.password self._warehouse = None # Will be populated in the warehouse property. try: self._db.open() except OperationalError as e: raise DatabaseMissingError( "Failed to open database '{}': {} ".format(self._db.dsn, e)) self.processes = None # Number of multiprocessing proccors. Default to all of them if search: self._search = Search(self, search) else: self._search = None
def test_converts_by_to_terms(self): search = Search(self.lib) cterms = search.make_query_from_terms({'by': 'Beslan'}) expected = '( type:p AND keywords:(Beslan) )' self.assertEquals(cterms, expected)
def test_converts_source_to_terms(self): search = Search(self.lib) cterms = search.make_query_from_terms({'source': 'Beslan'}) expected = ' (type:b AND keywords:Beslan ) AND ' self.assertEquals(cterms, expected)
def test_converts_with_to_terms(self): search = Search(self.lib) cterms = search.make_query_from_terms({'with': 'Beslan'}) expected = '( type:p AND doc:(Beslan) )' self.assertEquals(cterms, expected)
def test_joins_terms_with_or(self): search = Search(self.lib) cterms = search.make_query_from_terms({'by': 'Beslan', 'about': 'Beslan'}) expected = '( type:b AND doc:(Beslan) ) OR ( type:p AND keywords:(Beslan) AND doc:(Beslan) )' self.assertEquals(cterms, expected)
def search(self): if not self._search: self._search = Search(self) return self._search
def test_converts_string_to_terms(self): search = Search(self.lib) cterms = search.make_query_from_terms('about Beslan') expected = '( type:b AND doc:(beslan) ) OR ( type:p AND doc:(beslan) )' self.assertEquals(cterms, expected)