def test_alias_queue(self): self.d.create_new_db_and_connect(self.testing_db_name) providers = ProviderFactory.get_providers(self.app.config["PROVIDERS"]) response = self.client.post('/item/doi/' + quote_plus(TEST_DRYAD_DOI)) tiid = json.loads(response.data) # now get it back out response = self.client.get('/item/' + tiid) print tiid assert_equals(response.status_code, 200) resp_dict = json.loads(response.data) assert_equals( set(resp_dict.keys()), set([u'tiid', u'created', u'last_requested', u'metrics', u'last_modified', u'biblio', u'id', u'aliases']) ) assert_equals(unicode(TEST_DRYAD_DOI), resp_dict["aliases"]["doi"][0]) # test the view works res = self.d.view("aliases") assert len(res["rows"]) == 1, res assert_equals(TEST_DRYAD_DOI, res["rows"][0]["value"]["aliases"]["doi"][0]) # see if the item is on the queue my_alias_queue = AliasQueue(self.d) assert isinstance(my_alias_queue.queue, list) assert_equals(len(my_alias_queue.queue), 1) # get our item from the queue my_item = my_alias_queue.first() assert_equals(my_item.aliases.doi[0], TEST_DRYAD_DOI) # do the update using the backend alias_thread = ProvidersAliasThread(providers, self.d) alias_thread.run(run_only_once=True) # get the item back out again and bask in the awesome response = self.client.get('/item/' + tiid) resp_dict = json.loads(response.data) print tiid print response.data assert_equals( resp_dict["aliases"]["title"][0], "data from: can clone size serve as a proxy for clone age? an exploration using microsatellite divergence in populus tremuloides" ) print resp_dict assert_equals(resp_dict["biblio"]["data"]["year"], "2010")
def _spawn_threads(self): for provider in self.providers: if not provider.provides_metrics: continue logger.info("Spawning thread for provider " + str(provider.provider_name)) # create and start the metrics threads t = ProviderMetricsThread(provider, self.dao) t.start() self.threads.append(t) logger.info("Spawning thread for aliases") alias_thread = ProvidersAliasThread(self.providers, self.dao) alias_thread.start() self.threads.append(alias_thread)
def test_09_alias_stopped(self): # relies on Queue.first mock as per setUp providers = [ProviderMock()] pat = ProvidersAliasThread(providers, self.config) pat.queue = QueueMock() pat.start() pat.stop() pat.join() # there are no assertions to make here, only that the # test completes without error assert True
def test_18_alias_exceptions(self): """ test_18_alias_exceptions Set up the ProvidersAliasThread with a two mock providers, which simulate errors on processing aliases for various items. Check that we handle retries correctly. """ mock_provider1 = ProviderMock( aliases_exceptions={ 1:[ProviderRateLimitError,ProviderRateLimitError,ProviderRateLimitError], 2:[ProviderTimeout,ProviderTimeout,ProviderTimeout,ProviderTimeout], 4:[ProviderTimeout], } ) mock_provider1.name = 'mock1' mock_provider2 = ProviderMock( aliases_exceptions={ 1:[ProviderRateLimitError,ProviderRateLimitError,ProviderRateLimitError], 3:[ProviderTimeout,ProviderTimeout,ProviderTimeout,ProviderTimeout], 4:[ProviderTimeout], } ) mock_provider2.name = 'mock2' pmt = ProvidersAliasThread([mock_provider1,mock_provider2], self.config) pmt.queue = QueueMock(max_items=4) pmt.start() while (pmt.queue.current_item <= 4): time.sleep(1) pmt.stop() pmt.join() # Check that item 1 was processed correctly, after a retry self.assertTrue(mock_provider1.aliases_processed.has_key(1)) self.assertTrue(mock_provider2.aliases_processed.has_key(1)) ns_list = [k for (k,v) in pmt.queue.items[1].aliases.get_aliases_list()] self.assertEqual(set(ns_list),set(['mock','doi'])) # Check that item 2 failed on the first provider ## FIXME re-enable this test after queue refactor in sprint 6 ## self.assertFalse(mock_provider1.aliases_processed.has_key(2)) ## self.assertFalse(mock_provider2.aliases_processed.has_key(2)) ## self.assertEqual(pmt.queue.items[2].aliases.get_aliases_list(),[]) # Check that item 3 failed on the second provider self.assertTrue(mock_provider1.aliases_processed.has_key(3))
def test_11_alias_provider_not_implemented(self): # relies on Queue.first mock as per setUp providers = [ProviderNotImplemented()] pat = ProvidersAliasThread(providers, self.d) pat.queue = QueueMock() start = time.time() pat.start() time.sleep(2) pat.stop() pat.join() took = time.time() - start # The NotImplementedErrors should not derail the thread assert took > 2.0 assert took < 2.5
def test_10_alias_running(self): # relies on Queue.first mock as per setUp providers = [ProviderMock()] pat = ProvidersAliasThread(providers, self.config) pat.queue = QueueMock() start = time.time() pat.start() time.sleep(2) pat.stop() pat.join() took = time.time() - start # there are no assertions to make here, only that the # test completes without error in the appropriate time assert took > 2.0 assert took < 2.5
def test_17_alias_thread(self): """ test_17_alias_thread Set up the ProvidersAliasThread with a single mock provider. Check that it runs successfully with a single item in the queue, and that it processes the item ok. """ mock_provider = ProviderMock() pmt = ProvidersAliasThread([mock_provider], self.config) pmt.queue = QueueMock(max_items=1) pmt.start() while (pmt.queue.current_item <= 1): time.sleep(1) pmt.stop() pmt.join() # Check that item 1 was processed correctly, after a retry self.assertTrue(mock_provider.aliases_processed.has_key(1))
def main(logfile=None): logger = logging.getLogger() mydao = dao.Dao( app.config["DB_NAME"], app.config["DB_URL"], app.config["DB_USERNAME"], app.config["DB_PASSWORD"] ) # Adding this by handle. fileConfig doesn't allow filters to be added from totalimpact.backend import ctxfilter handler = logging.handlers.RotatingFileHandler(logfile) handler.level = logging.DEBUG formatter = logging.Formatter("%(asctime)s %(levelname)8s %(item)8s %(thread)s%(provider)s - %(message)s")#,"%H:%M:%S,%f") handler.formatter = formatter handler.addFilter(ctxfilter) logger.addHandler(handler) ctxfilter.threadInit() logger.debug("test") from totalimpact.backend import TotalImpactBackend, ProviderMetricsThread, ProvidersAliasThread, StoppableThread, QueueConsumer from totalimpact.providers.provider import Provider, ProviderFactory # Start all of the backend processes print "Starting alias retrieval thread" providers = ProviderFactory.get_providers(app.config["PROVIDERS"]) alias_threads = [] thread_count = app.config["ALIASES"]["workers"] for idx in range(thread_count): at = ProvidersAliasThread(providers, mydao, idx) at.thread_id = 'AliasThread(%i)' % idx at.start() alias_threads.append(at) print "Starting metric retrieval threads..." # Start each of the metric providers metrics_threads = [] for provider in providers: providers = ProviderFactory.get_providers(app.config["PROVIDERS"]) thread_count = app.config["PROVIDERS"][provider.provider_name]["workers"] print " ", provider.provider_name for idx in range(thread_count): thread = ProviderMetricsThread(provider, mydao) metrics_threads.append(thread) thread.thread_id = thread.thread_id + '(%i)' % idx thread.start() # Install a signal handler so we'll break out of the main loop # on receipt of relevant signals class ExitSignal(Exception): pass def kill_handler(signum, frame): raise ExitSignal() import signal signal.signal(signal.SIGTERM, kill_handler) try: while True: time.sleep(1) except (KeyboardInterrupt, ExitSignal), e: pass
def test_metrics_queue(self): """ Test that the metrics queue works This test isn't correct just now. We'd need to simulate the item getting it's aliases completed. """ raise SkipTest self.d.create_new_db_and_connect(self.testing_db_name) number_of_item_api_calls = 0 # create new dryad item dryad_resp = self.client.post('/item/doi/' + quote_plus(DRYAD_TEST_DOI)) number_of_item_api_calls += 1 dryad_tiid = dryad_resp.data # test the metrics view works res = self.d.view("metrics") assert_equals( len(res["rows"]), number_of_item_api_calls*len(self.providers) ) # three IDs above, three providers assert_equals( res["rows"][0]["value"]["metrics"]["dryad:package_views"]["values"], {}) # see if the item is on the queue all_metrics_queue = MetricsQueue(self.d) assert isinstance(all_metrics_queue.queue, list) assert_equals( len(all_metrics_queue.queue), number_of_item_api_calls*len(self.providers) ) # get our item from the queue my_item = all_metrics_queue.first() assert_equals(my_item.metrics["dryad:package_views"]['values'], {}) assert(my_item.created - time.time() < 30) # create new plos item plos_resp = self.client.post('/item/doi/' + quote_plus(PLOS_TEST_DOI)) number_of_item_api_calls += 1 plos_tiid = json.loads(plos_resp.data) # create new github item github_resp = self.client.post('/item/github/' + quote_plus(GITHUB_TEST_ID)) number_of_item_api_calls += 1 github_tiid = json.loads(github_resp.data) all_metrics_queue = MetricsQueue(self.d) #assert_equals(len(all_metrics_queue.queue), # number_of_item_api_calls*len(self.providers)) dryad_metrics_queue = MetricsQueue(self.d, "dryad") assert_equals(len(dryad_metrics_queue.queue), number_of_item_api_calls) github_metrics_queue = MetricsQueue(self.d, "github") assert_equals(len(github_metrics_queue.queue), number_of_item_api_calls) alias_thread = ProvidersAliasThread(self.providers, self.d) alias_thread.run(run_only_once=True) # now run just the dryad metrics thread. metrics_thread = ProviderMetricsThread(self.providers[0], self.d) metrics_thread.run(run_only_once=True) metrics_thread.run(run_only_once=True) metrics_thread.run(run_only_once=True) # test the dryad doi dryad_resp = self.client.get('/item/' + dryad_tiid.replace('"', '')) resp_dict = json.loads(dryad_resp.data) print json.dumps(resp_dict, sort_keys=True, indent=4) assert_equals(resp_dict['metrics']['dryad:total_downloads']['values'].values()[0], 169)