Example #1
0
    def test_deprecation_warning(self):
        """
        Make sure deprecation warnings are logged whenever BaseItem is used,
        either instantiated or in an isinstance check
        """
        with catch_warnings(record=True) as warnings:
            BaseItem()
            self.assertEqual(len(warnings), 1)
            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)

        with catch_warnings(record=True) as warnings:

            class SubclassedBaseItem(BaseItem):
                pass

            SubclassedBaseItem()
            self.assertEqual(len(warnings), 1)
            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)

        with catch_warnings(record=True) as warnings:
            self.assertFalse(isinstance("foo", BaseItem))
            self.assertEqual(len(warnings), 1)
            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)

        with catch_warnings(record=True) as warnings:
            self.assertTrue(isinstance(BaseItem(), BaseItem))
            self.assertEqual(len(warnings), 1)
            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
Example #2
0
    def test_process_spider_output_stats_legacy(self):
        # testing the subclass not handling stats works at runtime
        # (i.e. that trying to update stats does not trigger exception)
        class LegacyDeltaFetchSubClass(self.mwcls):
            def __init__(self, dir, reset=False, *args, **kwargs):
                super(LegacyDeltaFetchSubClass, self).__init__(dir=dir,
                                                               reset=reset)
                self.something = True

        self._create_test_db()
        mw = LegacyDeltaFetchSubClass(self.temp_dir, reset=False)
        mw.spider_opened(self.spider)
        response = mock.Mock()
        response.request = Request('http://url',
                                   meta={'deltafetch_key': 'key'})
        result = []
        self.assertEqual(
            list(mw.process_spider_output(response, result, self.spider)), [])
        self.assertEqual(self.stats.get_stats(), {})
        result = [
            Request('http://url', meta={'deltafetch_key': 'key'}),
            Request('http://url1', meta={'deltafetch_key': 'test_key_1'})
        ]

        # stats should not be updated
        self.assertEqual(
            list(mw.process_spider_output(response, result, self.spider)),
            [result[0]])
        self.assertEqual(self.stats.get_value('deltafetch/skipped'), None)

        result = [BaseItem(), "not a base item"]
        self.assertEqual(
            list(mw.process_spider_output(response, result, self.spider)),
            result)
        self.assertEqual(self.stats.get_value('deltafetch/stored'), None)
Example #3
0
    def test_process_spider_output(self):
        self._create_test_db()
        mw = self.mwcls(self.temp_dir, reset=False)
        mw.spider_opened(self.spider)
        response = mock.Mock()
        response.request = Request('http://url',
                                   meta={'deltafetch_key': 'key'})
        result = []
        self.assertEqual(
            list(mw.process_spider_output(response, result, self.spider)), [])

        result = [
            Request('http://url', meta={'deltafetch_key': 'key1'}),
            Request('http://url1', meta={'deltafetch_key': 'test_key_1'})
        ]
        self.assertEqual(
            list(mw.process_spider_output(response, result, self.spider)),
            [result[0]])

        result = [BaseItem(), "not a base item"]
        self.assertEqual(
            list(mw.process_spider_output(response, result, self.spider)),
            result)
        self.assertEqual(mw.db.keys(), ['test_key_1', 'key', 'test_key_2'])
        assert mw.db['key']
Example #4
0
    def test_isinstance_check(self):
        class SubclassedBaseItem(BaseItem):
            pass

        class SubclassedItem(Item):
            pass

        self.assertTrue(isinstance(BaseItem(), BaseItem))
        self.assertTrue(isinstance(SubclassedBaseItem(), BaseItem))
        self.assertTrue(isinstance(Item(), BaseItem))
        self.assertTrue(isinstance(SubclassedItem(), BaseItem))

        # make sure internal checks using private _BaseItem class succeed
        self.assertTrue(isinstance(BaseItem(), _BaseItem))
        self.assertTrue(isinstance(SubclassedBaseItem(), _BaseItem))
        self.assertTrue(isinstance(Item(), _BaseItem))
        self.assertTrue(isinstance(SubclassedItem(), _BaseItem))
Example #5
0
    def test_iterate_spider_output(self):
        i = BaseItem()
        r = Request('http://scrapytest.org')
        o = object()

        self.assertEqual(list(iterate_spider_output(i)), [i])
        self.assertEqual(list(iterate_spider_output(r)), [r])
        self.assertEqual(list(iterate_spider_output(o)), [o])
        self.assertEqual(list(iterate_spider_output([r, i, o])), [r, i, o])
Example #6
0
    def test_isinstance_check(self):
        class SubclassedBaseItem(BaseItem):
            pass

        class SubclassedItem(Item):
            pass

        with catch_warnings():
            filterwarnings("ignore", category=ScrapyDeprecationWarning)
            self.assertTrue(isinstance(BaseItem(), BaseItem))
            self.assertTrue(isinstance(SubclassedBaseItem(), BaseItem))
            self.assertTrue(isinstance(Item(), BaseItem))
            self.assertTrue(isinstance(SubclassedItem(), BaseItem))

            # make sure internal checks using private _BaseItem class succeed
            self.assertTrue(isinstance(BaseItem(), _BaseItem))
            self.assertTrue(isinstance(SubclassedBaseItem(), _BaseItem))
            self.assertTrue(isinstance(Item(), _BaseItem))
            self.assertTrue(isinstance(SubclassedItem(), _BaseItem))
Example #7
0
 def test_process_spider_output_stats(self):
     self._create_test_db()
     mw = self.mwcls(self.temp_dir, reset=False, stats=self.stats)
     mw.spider_opened(self.spider)
     response = mock.Mock()
     response.request = Request('http://url',
                                meta={'deltafetch_key': 'key'})
     result = []
     self.assertEqual(list(mw.process_spider_output(
         response, result, self.spider)), [])
     self.assertEqual(self.stats.get_stats(), {})
     result = [
         Request('http://url', meta={'deltafetch_key': 'key'}),
         Request('http://url1', meta={'deltafetch_key': 'test_key_1'})
     ]
     self.assertEqual(list(mw.process_spider_output(
         response, result, self.spider)), [result[0]])
     self.assertEqual(self.stats.get_value('deltafetch/skipped'), 1)
     result = [BaseItem(), "not a base item"]
     self.assertEqual(list(mw.process_spider_output(
         response, result, self.spider)), result)
     self.assertEqual(self.stats.get_value('deltafetch/stored'), 1)
Example #8
0
    def test_process_spider_output(self):
        self._create_test_db()
        mw = self.mwcls(self.temp_dir, reset=False, stats=self.stats)
        mw.spider_opened(self.spider)
        response = mock.Mock()
        response.request = Request('http://url',
                                   meta={'deltafetch_key': 'key'})
        result = []
        self.assertEqual(list(mw.process_spider_output(
            response, result, self.spider)), [])
        result = [
            # same URL but with new key --> it should be processed
            Request('http://url', meta={'deltafetch_key': 'key1'}),

            # 'test_key_1' is already in the test db --> it should be skipped
            Request('http://url1', meta={'deltafetch_key': 'test_key_1'})
        ]
        # so only the 1 request should go through
        self.assertEqual(list(mw.process_spider_output(
            response, result, self.spider)), [result[0]])

        # the skipped "http://url1" should be counted in stats
        self.assertEqual(self.stats.get_stats(), {'deltafetch/skipped': 1})

        # b'key' should not be in the db yet as no item was collected yet
        self.assertEqual(set(mw.db.keys()),
                         set([b'test_key_1',
                              b'test_key_2']))

        # if the spider returns items, the request's key is added in db
        result = [BaseItem(), "not a base item"]
        self.assertEqual(list(mw.process_spider_output(
            response, result, self.spider)), result)
        self.assertEqual(set(mw.db.keys()),
                         set([b'key',
                              b'test_key_1',
                              b'test_key_2']))
        assert mw.db[b'key']