def test_rules_manager_callback_with_arguments(self): spider = BaseSpider('foo') response = HtmlResponse('http://example.org') kwargs = {'a': 1} def myfunc(**mykwargs): return mykwargs # verify return validation self.failUnlessEquals(kwargs, myfunc(**kwargs)) # test callback w/o arguments rulesman = RulesManager([ Rule(BaseMatcher(), myfunc), ], spider) rule = rulesman.get_rule_from_response(response) # without arguments should return same callback self.failUnlessEqual(rule.callback, myfunc) # test callback w/ arguments rulesman = RulesManager([ Rule(BaseMatcher(), myfunc, **kwargs), ], spider) rule = rulesman.get_rule_from_response(response) # with argument should return partial applied callback self.failUnless(isinstance(rule.callback, partial)) self.failUnlessEquals(kwargs, rule.callback())
def test_rules_manager_callbacks(self): mycallback = lambda: True spider = BaseSpider('foo') spider.parse_item = lambda: True response1 = HtmlResponse('http://example.org') response2 = HtmlResponse('http://othersite.org') rulesman = RulesManager([ Rule('example', mycallback), Rule('othersite', 'parse_item'), ], spider, default_matcher=UrlRegexMatcher) rule1 = rulesman.get_rule_from_response(response1) rule2 = rulesman.get_rule_from_response(response2) self.failUnlessEqual(rule1.callback, mycallback) self.failUnlessEqual(rule2.callback, spider.parse_item) # fail unknown callback self.assertRaises(AttributeError, RulesManager, [Rule(BaseMatcher(), 'mycallback')], spider) # fail not callable spider.not_callable = True self.assertRaises(AttributeError, RulesManager, [Rule(BaseMatcher(), 'not_callable')], spider)
def test_rules_manager_callbacks(self): mycallback = lambda: True spider = BaseSpider('foo') spider.parse_item = lambda: True response1 = HtmlResponse('http://example.org') response2 = HtmlResponse('http://othersite.org') rulesman = RulesManager([ Rule('example', mycallback), Rule('othersite', 'parse_item'), ], spider, default_matcher=UrlRegexMatcher) rule1 = rulesman.get_rule_from_response(response1) rule2 = rulesman.get_rule_from_response(response2) self.failUnlessEqual(rule1.callback, mycallback) self.failUnlessEqual(rule2.callback, spider.parse_item) # fail unknown callback self.assertRaises(AttributeError, RulesManager, [ Rule(BaseMatcher(), 'mycallback') ], spider) # fail not callable spider.not_callable = True self.assertRaises(AttributeError, RulesManager, [ Rule(BaseMatcher(), 'not_callable') ], spider)
def test_rules_manager_empty_rule(self): spider = BaseSpider('foo') response = HtmlResponse('http://example.org') rulesman = RulesManager([Rule(follow=True)], spider) rule = rulesman.get_rule_from_response(response) # default matcher if None: BaseMatcher self.failUnless(isinstance(rule.matcher, BaseMatcher))
def test_rules_manager_default_matcher(self): spider = BaseSpider('foo') response = HtmlResponse('http://example.org') callback = lambda x: None rulesman = RulesManager([ Rule('http://example.org', callback), ], spider, default_matcher=UrlMatcher) rule = rulesman.get_rule_from_response(response) self.failUnless(isinstance(rule.matcher, UrlMatcher))
def test_rules_manager_matchers(self): spider = BaseSpider('foo') response1 = HtmlResponse('http://example.org') response2 = HtmlResponse('http://othersite.org') urlmatcher = UrlMatcher('http://example.org') basematcher = BaseMatcher() # callback needed for Rule callback = lambda x: None # test fail matcher resolve self.assertRaises(ValueError, RulesManager, [Rule(False, callback)], spider) self.assertRaises(ValueError, RulesManager, [Rule(spider, callback)], spider) rulesman = RulesManager([ Rule(urlmatcher, callback), Rule(basematcher, callback), ], spider) # response1 matches example.org rule1 = rulesman.get_rule_from_response(response1) # response2 is catch by BaseMatcher() rule2 = rulesman.get_rule_from_response(response2) self.failUnlessEqual(rule1.matcher, urlmatcher) self.failUnlessEqual(rule2.matcher, basematcher) # reverse order. BaseMatcher should match all rulesman = RulesManager([ Rule(basematcher, callback), Rule(urlmatcher, callback), ], spider) rule1 = rulesman.get_rule_from_response(response1) rule2 = rulesman.get_rule_from_response(response2) self.failUnlessEqual(rule1.matcher, basematcher) self.failUnlessEqual(rule2.matcher, basematcher) self.failUnless(rule1 is rule2)
def test_rules_manager_basic(self): spider = BaseSpider('foo') response1 = HtmlResponse('http://example.org') response2 = HtmlResponse('http://othersite.org') rulesman = RulesManager([], spider) # should return none self.failIf(rulesman.get_rule_from_response(response1)) self.failIf(rulesman.get_rule_from_response(response2)) # rules manager with match-all rule rulesman = RulesManager([ Rule(BaseMatcher(), follow=True), ], spider) # returns CompiledRule rule1 = rulesman.get_rule_from_response(response1) rule2 = rulesman.get_rule_from_response(response2) self.failUnless(isinstance(rule1, CompiledRule)) self.failUnless(isinstance(rule2, CompiledRule)) self.assert_(rule1 is rule2) self.failUnlessEqual(rule1.callback, None) self.failUnlessEqual(rule1.follow, True)