def test_alias_queue(self):
        self.d.create_new_db_and_connect(self.testing_db_name)

        providers = ProviderFactory.get_providers(self.app.config["PROVIDERS"])

        response = self.client.post('/item/doi/' + quote_plus(TEST_DRYAD_DOI))
        tiid = json.loads(response.data)


        # now get it back out
        response = self.client.get('/item/' + tiid)
        print tiid
        assert_equals(response.status_code, 200)
        
        resp_dict = json.loads(response.data)
        assert_equals(
            set(resp_dict.keys()),
            set([u'tiid', u'created', u'last_requested', u'metrics', 
                u'last_modified', u'biblio', u'id', u'aliases'])
            )
        assert_equals(unicode(TEST_DRYAD_DOI), resp_dict["aliases"]["doi"][0])

        # test the view works
        res = self.d.view("aliases")
        assert len(res["rows"]) == 1, res
        assert_equals(TEST_DRYAD_DOI, res["rows"][0]["value"]["aliases"]["doi"][0])

        # see if the item is on the queue
        my_alias_queue = AliasQueue(self.d)
        assert isinstance(my_alias_queue.queue, list)
        assert_equals(len(my_alias_queue.queue), 1)
        
        # get our item from the queue
        my_item = my_alias_queue.first()
        assert_equals(my_item.aliases.doi[0], TEST_DRYAD_DOI)

        # do the update using the backend
        alias_thread = ProvidersAliasThread(providers, self.d)
        alias_thread.run(run_only_once=True)

        # get the item back out again and bask in the awesome
        response = self.client.get('/item/' + tiid)
        resp_dict = json.loads(response.data)
        print tiid
        print response.data
        assert_equals(
            resp_dict["aliases"]["title"][0],
            "data from: can clone size serve as a proxy for clone age? an exploration using microsatellite divergence in populus tremuloides"
            )
        print resp_dict
        assert_equals(resp_dict["biblio"]["data"]["year"], "2010")
Exemple #2
0
 def _spawn_threads(self):
     
     for provider in self.providers:
         if not provider.provides_metrics:
             continue
         logger.info("Spawning thread for provider " + str(provider.provider_name))
         # create and start the metrics threads
         t = ProviderMetricsThread(provider, self.dao)
         t.start()
         self.threads.append(t)
     
     logger.info("Spawning thread for aliases")
     alias_thread = ProvidersAliasThread(self.providers, self.dao)
     alias_thread.start()
     self.threads.append(alias_thread)
 def test_09_alias_stopped(self):
     # relies on Queue.first mock as per setUp
     
     providers = [ProviderMock()]
     pat = ProvidersAliasThread(providers, self.config)
     pat.queue = QueueMock()
     
     pat.start()
     pat.stop()
     pat.join()
     
     # there are no assertions to make here, only that the
     # test completes without error
     assert True
    def test_18_alias_exceptions(self):
        """ test_18_alias_exceptions

            Set up the ProvidersAliasThread with a two mock providers, which 
            simulate errors on processing aliases for various items. Check that
            we handle retries correctly.
        """
        mock_provider1 = ProviderMock(
            aliases_exceptions={
                1:[ProviderRateLimitError,ProviderRateLimitError,ProviderRateLimitError],
                2:[ProviderTimeout,ProviderTimeout,ProviderTimeout,ProviderTimeout],
                4:[ProviderTimeout],
            }
        )
        mock_provider1.name = 'mock1'

        mock_provider2 = ProviderMock(
            aliases_exceptions={
                1:[ProviderRateLimitError,ProviderRateLimitError,ProviderRateLimitError],
                3:[ProviderTimeout,ProviderTimeout,ProviderTimeout,ProviderTimeout],
                4:[ProviderTimeout],
            }
        )
        mock_provider2.name = 'mock2'

        pmt = ProvidersAliasThread([mock_provider1,mock_provider2], self.config)
        pmt.queue = QueueMock(max_items=4)
        
        pmt.start()
        while (pmt.queue.current_item <= 4): 
            time.sleep(1)
        pmt.stop()
        pmt.join()

        # Check that item 1 was processed correctly, after a retry
        self.assertTrue(mock_provider1.aliases_processed.has_key(1))
        self.assertTrue(mock_provider2.aliases_processed.has_key(1))
        ns_list = [k for (k,v) in pmt.queue.items[1].aliases.get_aliases_list()]
        self.assertEqual(set(ns_list),set(['mock','doi']))

        # Check that item 2 failed on the first provider
        ## FIXME re-enable this test after queue refactor in sprint 6        
        ## self.assertFalse(mock_provider1.aliases_processed.has_key(2))
        ## self.assertFalse(mock_provider2.aliases_processed.has_key(2))
        ## self.assertEqual(pmt.queue.items[2].aliases.get_aliases_list(),[])

        # Check that item 3 failed on the second provider
        self.assertTrue(mock_provider1.aliases_processed.has_key(3))
 def test_11_alias_provider_not_implemented(self):
     # relies on Queue.first mock as per setUp
     
     providers = [ProviderNotImplemented()] 
     pat = ProvidersAliasThread(providers, self.d)
     pat.queue = QueueMock()
     
     start = time.time()
     pat.start()
     time.sleep(2)
     
     pat.stop()
     pat.join()
     took = time.time() - start
     
     # The NotImplementedErrors should not derail the thread
     assert took > 2.0
     assert took < 2.5
 def test_10_alias_running(self):
     # relies on Queue.first mock as per setUp
     providers = [ProviderMock()]
     pat = ProvidersAliasThread(providers, self.config)
     pat.queue = QueueMock()
     
     start = time.time()
     pat.start()
     time.sleep(2)
     
     pat.stop()
     pat.join()
     took = time.time() - start
     
     # there are no assertions to make here, only that the
     # test completes without error in the appropriate time
     assert took > 2.0
     assert took < 2.5
    def test_17_alias_thread(self):
        """ test_17_alias_thread

            Set up the ProvidersAliasThread with a single mock provider. Check
            that it runs successfully with a single item in the queue, and that
            it processes the item ok.
        """
        mock_provider = ProviderMock() 
        pmt = ProvidersAliasThread([mock_provider], self.config)
        pmt.queue = QueueMock(max_items=1)
        
        pmt.start()
        while (pmt.queue.current_item <= 1): 
            time.sleep(1)
        pmt.stop()
        pmt.join()

        # Check that item 1 was processed correctly, after a retry
        self.assertTrue(mock_provider.aliases_processed.has_key(1))
Exemple #8
0
def main(logfile=None):

    logger = logging.getLogger()

    mydao = dao.Dao(
        app.config["DB_NAME"],
        app.config["DB_URL"],
        app.config["DB_USERNAME"],
        app.config["DB_PASSWORD"]
    ) 

    # Adding this by handle. fileConfig doesn't allow filters to be added
    from totalimpact.backend import ctxfilter
    handler = logging.handlers.RotatingFileHandler(logfile)
    handler.level = logging.DEBUG
    formatter = logging.Formatter("%(asctime)s %(levelname)8s %(item)8s %(thread)s%(provider)s - %(message)s")#,"%H:%M:%S,%f")
    handler.formatter = formatter
    handler.addFilter(ctxfilter)
    logger.addHandler(handler)
    ctxfilter.threadInit()

    logger.debug("test")

    from totalimpact.backend import TotalImpactBackend, ProviderMetricsThread, ProvidersAliasThread, StoppableThread, QueueConsumer
    from totalimpact.providers.provider import Provider, ProviderFactory

    # Start all of the backend processes
    print "Starting alias retrieval thread"
    providers = ProviderFactory.get_providers(app.config["PROVIDERS"])

    alias_threads = []
    thread_count = app.config["ALIASES"]["workers"]
    for idx in range(thread_count):
        at = ProvidersAliasThread(providers, mydao, idx)
        at.thread_id = 'AliasThread(%i)' % idx
        at.start()
        alias_threads.append(at)

    print "Starting metric retrieval threads..."
    # Start each of the metric providers
    metrics_threads = []
    for provider in providers:
        providers = ProviderFactory.get_providers(app.config["PROVIDERS"])
        thread_count = app.config["PROVIDERS"][provider.provider_name]["workers"]
        print "  ", provider.provider_name
        for idx in range(thread_count):
            thread = ProviderMetricsThread(provider, mydao)
            metrics_threads.append(thread)
            thread.thread_id = thread.thread_id + '(%i)' % idx
            thread.start()

    # Install a signal handler so we'll break out of the main loop
    # on receipt of relevant signals
    class ExitSignal(Exception):
        pass
 
    def kill_handler(signum, frame):
        raise ExitSignal()

    import signal
    signal.signal(signal.SIGTERM, kill_handler)

    try:
        while True:
            time.sleep(1)
    except (KeyboardInterrupt, ExitSignal), e:
        pass
    def test_metrics_queue(self):
        """ Test that the metrics queue works

            This test isn't correct just now. We'd need to simulate
            the item getting it's aliases completed.
        """
        raise SkipTest
        self.d.create_new_db_and_connect(self.testing_db_name)
        number_of_item_api_calls = 0

        # create new dryad item 
        dryad_resp = self.client.post('/item/doi/' + 
                quote_plus(DRYAD_TEST_DOI))
        number_of_item_api_calls += 1
        dryad_tiid = dryad_resp.data

        # test the metrics view works
        res = self.d.view("metrics")
        assert_equals(
            len(res["rows"]),
             number_of_item_api_calls*len(self.providers)
            )  # three IDs above, three providers
        assert_equals(
            res["rows"][0]["value"]["metrics"]["dryad:package_views"]["values"],
            {})

        # see if the item is on the queue
        all_metrics_queue = MetricsQueue(self.d) 
        assert isinstance(all_metrics_queue.queue, list)
        assert_equals(
            len(all_metrics_queue.queue),
            number_of_item_api_calls*len(self.providers)
            )
        
        # get our item from the queue
        my_item = all_metrics_queue.first() 
        assert_equals(my_item.metrics["dryad:package_views"]['values'], {})
        assert(my_item.created - time.time() < 30)


        # create new plos item 
        plos_resp = self.client.post('/item/doi/' + quote_plus(PLOS_TEST_DOI))
        number_of_item_api_calls += 1        
        plos_tiid = json.loads(plos_resp.data)

        # create new github item 
        github_resp = self.client.post('/item/github/' + quote_plus(GITHUB_TEST_ID))
        number_of_item_api_calls += 1        
        github_tiid = json.loads(github_resp.data)

        all_metrics_queue = MetricsQueue(self.d)
        #assert_equals(len(all_metrics_queue.queue), 
        #        number_of_item_api_calls*len(self.providers)) 

        dryad_metrics_queue = MetricsQueue(self.d, "dryad")
        assert_equals(len(dryad_metrics_queue.queue), 
                number_of_item_api_calls) 

        github_metrics_queue = MetricsQueue(self.d, "github")
        assert_equals(len(github_metrics_queue.queue), 
                number_of_item_api_calls) 


        alias_thread = ProvidersAliasThread(self.providers, self.d)
        alias_thread.run(run_only_once=True)

        # now run just the dryad metrics thread.
        metrics_thread = ProviderMetricsThread(self.providers[0], self.d)
        metrics_thread.run(run_only_once=True)  
        metrics_thread.run(run_only_once=True)
        metrics_thread.run(run_only_once=True)

        # test the dryad doi
        dryad_resp = self.client.get('/item/' + dryad_tiid.replace('"', ''))

        resp_dict = json.loads(dryad_resp.data)
        print json.dumps(resp_dict, sort_keys=True, indent=4) 

        assert_equals(resp_dict['metrics']['dryad:total_downloads']['values'].values()[0],
            169)