def test_deprecation_warning(self): """ Make sure deprecation warnings are logged whenever BaseItem is used, either instantiated or in an isinstance check """ with catch_warnings(record=True) as warnings: BaseItem() self.assertEqual(len(warnings), 1) self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) with catch_warnings(record=True) as warnings: class SubclassedBaseItem(BaseItem): pass SubclassedBaseItem() self.assertEqual(len(warnings), 1) self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) with catch_warnings(record=True) as warnings: self.assertFalse(isinstance("foo", BaseItem)) self.assertEqual(len(warnings), 1) self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) with catch_warnings(record=True) as warnings: self.assertTrue(isinstance(BaseItem(), BaseItem)) self.assertEqual(len(warnings), 1) self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
def test_process_spider_output_stats_legacy(self): # testing the subclass not handling stats works at runtime # (i.e. that trying to update stats does not trigger exception) class LegacyDeltaFetchSubClass(self.mwcls): def __init__(self, dir, reset=False, *args, **kwargs): super(LegacyDeltaFetchSubClass, self).__init__(dir=dir, reset=reset) self.something = True self._create_test_db() mw = LegacyDeltaFetchSubClass(self.temp_dir, reset=False) mw.spider_opened(self.spider) response = mock.Mock() response.request = Request('http://url', meta={'deltafetch_key': 'key'}) result = [] self.assertEqual( list(mw.process_spider_output(response, result, self.spider)), []) self.assertEqual(self.stats.get_stats(), {}) result = [ Request('http://url', meta={'deltafetch_key': 'key'}), Request('http://url1', meta={'deltafetch_key': 'test_key_1'}) ] # stats should not be updated self.assertEqual( list(mw.process_spider_output(response, result, self.spider)), [result[0]]) self.assertEqual(self.stats.get_value('deltafetch/skipped'), None) result = [BaseItem(), "not a base item"] self.assertEqual( list(mw.process_spider_output(response, result, self.spider)), result) self.assertEqual(self.stats.get_value('deltafetch/stored'), None)
def test_process_spider_output(self): self._create_test_db() mw = self.mwcls(self.temp_dir, reset=False) mw.spider_opened(self.spider) response = mock.Mock() response.request = Request('http://url', meta={'deltafetch_key': 'key'}) result = [] self.assertEqual( list(mw.process_spider_output(response, result, self.spider)), []) result = [ Request('http://url', meta={'deltafetch_key': 'key1'}), Request('http://url1', meta={'deltafetch_key': 'test_key_1'}) ] self.assertEqual( list(mw.process_spider_output(response, result, self.spider)), [result[0]]) result = [BaseItem(), "not a base item"] self.assertEqual( list(mw.process_spider_output(response, result, self.spider)), result) self.assertEqual(mw.db.keys(), ['test_key_1', 'key', 'test_key_2']) assert mw.db['key']
def test_isinstance_check(self): class SubclassedBaseItem(BaseItem): pass class SubclassedItem(Item): pass self.assertTrue(isinstance(BaseItem(), BaseItem)) self.assertTrue(isinstance(SubclassedBaseItem(), BaseItem)) self.assertTrue(isinstance(Item(), BaseItem)) self.assertTrue(isinstance(SubclassedItem(), BaseItem)) # make sure internal checks using private _BaseItem class succeed self.assertTrue(isinstance(BaseItem(), _BaseItem)) self.assertTrue(isinstance(SubclassedBaseItem(), _BaseItem)) self.assertTrue(isinstance(Item(), _BaseItem)) self.assertTrue(isinstance(SubclassedItem(), _BaseItem))
def test_iterate_spider_output(self): i = BaseItem() r = Request('http://scrapytest.org') o = object() self.assertEqual(list(iterate_spider_output(i)), [i]) self.assertEqual(list(iterate_spider_output(r)), [r]) self.assertEqual(list(iterate_spider_output(o)), [o]) self.assertEqual(list(iterate_spider_output([r, i, o])), [r, i, o])
def test_isinstance_check(self): class SubclassedBaseItem(BaseItem): pass class SubclassedItem(Item): pass with catch_warnings(): filterwarnings("ignore", category=ScrapyDeprecationWarning) self.assertTrue(isinstance(BaseItem(), BaseItem)) self.assertTrue(isinstance(SubclassedBaseItem(), BaseItem)) self.assertTrue(isinstance(Item(), BaseItem)) self.assertTrue(isinstance(SubclassedItem(), BaseItem)) # make sure internal checks using private _BaseItem class succeed self.assertTrue(isinstance(BaseItem(), _BaseItem)) self.assertTrue(isinstance(SubclassedBaseItem(), _BaseItem)) self.assertTrue(isinstance(Item(), _BaseItem)) self.assertTrue(isinstance(SubclassedItem(), _BaseItem))
def test_process_spider_output_stats(self): self._create_test_db() mw = self.mwcls(self.temp_dir, reset=False, stats=self.stats) mw.spider_opened(self.spider) response = mock.Mock() response.request = Request('http://url', meta={'deltafetch_key': 'key'}) result = [] self.assertEqual(list(mw.process_spider_output( response, result, self.spider)), []) self.assertEqual(self.stats.get_stats(), {}) result = [ Request('http://url', meta={'deltafetch_key': 'key'}), Request('http://url1', meta={'deltafetch_key': 'test_key_1'}) ] self.assertEqual(list(mw.process_spider_output( response, result, self.spider)), [result[0]]) self.assertEqual(self.stats.get_value('deltafetch/skipped'), 1) result = [BaseItem(), "not a base item"] self.assertEqual(list(mw.process_spider_output( response, result, self.spider)), result) self.assertEqual(self.stats.get_value('deltafetch/stored'), 1)
def test_process_spider_output(self): self._create_test_db() mw = self.mwcls(self.temp_dir, reset=False, stats=self.stats) mw.spider_opened(self.spider) response = mock.Mock() response.request = Request('http://url', meta={'deltafetch_key': 'key'}) result = [] self.assertEqual(list(mw.process_spider_output( response, result, self.spider)), []) result = [ # same URL but with new key --> it should be processed Request('http://url', meta={'deltafetch_key': 'key1'}), # 'test_key_1' is already in the test db --> it should be skipped Request('http://url1', meta={'deltafetch_key': 'test_key_1'}) ] # so only the 1 request should go through self.assertEqual(list(mw.process_spider_output( response, result, self.spider)), [result[0]]) # the skipped "http://url1" should be counted in stats self.assertEqual(self.stats.get_stats(), {'deltafetch/skipped': 1}) # b'key' should not be in the db yet as no item was collected yet self.assertEqual(set(mw.db.keys()), set([b'test_key_1', b'test_key_2'])) # if the spider returns items, the request's key is added in db result = [BaseItem(), "not a base item"] self.assertEqual(list(mw.process_spider_output( response, result, self.spider)), result) self.assertEqual(set(mw.db.keys()), set([b'key', b'test_key_1', b'test_key_2'])) assert mw.db[b'key']