def test_prefixed_mapping0(self): self.dataset[u'http://google.com/*'] = NoContentDatasource('', _matcher=u'http://google.com/*') self.dataset[u'http://google.com/test'] = NoContentDatasource('', _matcher=u'http://google.com/test') self.dataset[u'http://google.com/test*'] = NoContentDatasource('', _matcher=u'http://google.com/test*') self.dataset[u'http://google.com/testz'] = NoContentDatasource('', _matcher=u'http://google.com/testz') self.dataset[u'http://google.com/'] = NoContentDatasource('', _matcher=u'http://google.com/') self.dataset[u'*'] = NoContentDatasource('', _matcher=u'*') print res = dict((k._matcher if k is not None else None, v)\ for k,v in \ map_datasources(urls('http://google.com/test', 'http://google.com/test/of/test&a=1', 'http://google.com/test/of/testo', 'http://google.com/quasd', 'http://google.com/', 'http://google.com/testz', 'http://google.com/testz/a', 'http://google.com', 'http://gogle.com/'), self.dataset).items()) self.assertTrue(res == { None: ['http://google.com', 'http://gogle.com/'], u'http://google.com/': ['http://google.com/'], u'http://google.com/*': ['http://google.com/quasd'], u'http://google.com/test': ['http://google.com/test'], u'http://google.com/test*': ['http://google.com/test/of/test&a=1', 'http://google.com/test/of/testo', 'http://google.com/testz/a'], u'http://google.com/testz': ['http://google.com/testz']})
def take_action(self, args): if isinstance(args.handle, basestring): args.handle = get_handler_obj(value, args.handle) root_handler = logging.getLogger('') handlers = root_handler.handlers root_handler.handlers = [] dataset = get_dataset(self.app.crawler, args.dataset) urls = non_fnmatchers(dataset) for datasource, urls in map_datasources(urls, dataset).items(): for item in self.crawl(urls, datasource): args.handle(item) root_handler.handlers = handlers
def test_prefixed_mapping0(self): self.dataset[u'http://google.com/*'] = NoContentDatasource( '', _matcher=u'http://google.com/*') self.dataset[u'http://google.com/test'] = NoContentDatasource( '', _matcher=u'http://google.com/test') self.dataset[u'http://google.com/test*'] = NoContentDatasource( '', _matcher=u'http://google.com/test*') self.dataset[u'http://google.com/testz'] = NoContentDatasource( '', _matcher=u'http://google.com/testz') self.dataset[u'http://google.com/'] = NoContentDatasource( '', _matcher=u'http://google.com/') self.dataset[u'*'] = NoContentDatasource('', _matcher=u'*') print res = dict((k._matcher if k is not None else None, v)\ for k,v in \ map_datasources(urls('http://google.com/test', 'http://google.com/test/of/test&a=1', 'http://google.com/test/of/testo', 'http://google.com/quasd', 'http://google.com/', 'http://google.com/testz', 'http://google.com/testz/a', 'http://google.com', 'http://gogle.com/'), self.dataset).items()) self.assertTrue( res == { None: ['http://google.com', 'http://gogle.com/'], u'http://google.com/': ['http://google.com/'], u'http://google.com/*': ['http://google.com/quasd'], u'http://google.com/test': ['http://google.com/test'], u'http://google.com/test*': [ 'http://google.com/test/of/test&a=1', 'http://google.com/test/of/testo', 'http://google.com/testz/a' ], u'http://google.com/testz': ['http://google.com/testz'] })