Esempio n. 1
0
 def test_prefixed_mapping0(self):
     self.dataset[u'http://google.com/*'] = NoContentDatasource('', _matcher=u'http://google.com/*')
     self.dataset[u'http://google.com/test'] = NoContentDatasource('', _matcher=u'http://google.com/test')
     self.dataset[u'http://google.com/test*'] = NoContentDatasource('', _matcher=u'http://google.com/test*')
     self.dataset[u'http://google.com/testz'] = NoContentDatasource('', _matcher=u'http://google.com/testz')
     self.dataset[u'http://google.com/'] = NoContentDatasource('', _matcher=u'http://google.com/')
     self.dataset[u'*'] = NoContentDatasource('', _matcher=u'*')
     print
     res = dict((k._matcher if k is not None else None, v)\
                   for k,v in \
                   map_datasources(urls('http://google.com/test',
                                        'http://google.com/test/of/test&a=1',
                                        'http://google.com/test/of/testo',
                                        'http://google.com/quasd',
                                        'http://google.com/',
                                        'http://google.com/testz',
                                        'http://google.com/testz/a',
                                        'http://google.com',
                                        'http://gogle.com/'),
                                   self.dataset).items())
     self.assertTrue(res == { None: ['http://google.com', 'http://gogle.com/'],
                              u'http://google.com/': ['http://google.com/'],
                              u'http://google.com/*': ['http://google.com/quasd'],
                              u'http://google.com/test': ['http://google.com/test'],
                              u'http://google.com/test*': ['http://google.com/test/of/test&a=1',
                                                           'http://google.com/test/of/testo',
                                                           'http://google.com/testz/a'],
                              u'http://google.com/testz': ['http://google.com/testz']})
Esempio n. 2
0
    def take_action(self, args):
        if isinstance(args.handle, basestring):
            args.handle = get_handler_obj(value, args.handle)
        
        root_handler = logging.getLogger('')
        handlers = root_handler.handlers
        root_handler.handlers = []
        
        dataset = get_dataset(self.app.crawler, args.dataset)
        urls = non_fnmatchers(dataset)
        for datasource, urls in map_datasources(urls, dataset).items():
            for item in self.crawl(urls, datasource):
                args.handle(item)

        root_handler.handlers = handlers
Esempio n. 3
0
    def take_action(self, args):
        if isinstance(args.handle, basestring):
            args.handle = get_handler_obj(value, args.handle)

        root_handler = logging.getLogger('')
        handlers = root_handler.handlers
        root_handler.handlers = []

        dataset = get_dataset(self.app.crawler, args.dataset)
        urls = non_fnmatchers(dataset)
        for datasource, urls in map_datasources(urls, dataset).items():
            for item in self.crawl(urls, datasource):
                args.handle(item)

        root_handler.handlers = handlers
Esempio n. 4
0
 def test_prefixed_mapping0(self):
     self.dataset[u'http://google.com/*'] = NoContentDatasource(
         '', _matcher=u'http://google.com/*')
     self.dataset[u'http://google.com/test'] = NoContentDatasource(
         '', _matcher=u'http://google.com/test')
     self.dataset[u'http://google.com/test*'] = NoContentDatasource(
         '', _matcher=u'http://google.com/test*')
     self.dataset[u'http://google.com/testz'] = NoContentDatasource(
         '', _matcher=u'http://google.com/testz')
     self.dataset[u'http://google.com/'] = NoContentDatasource(
         '', _matcher=u'http://google.com/')
     self.dataset[u'*'] = NoContentDatasource('', _matcher=u'*')
     print
     res = dict((k._matcher if k is not None else None, v)\
                   for k,v in \
                   map_datasources(urls('http://google.com/test',
                                        'http://google.com/test/of/test&a=1',
                                        'http://google.com/test/of/testo',
                                        'http://google.com/quasd',
                                        'http://google.com/',
                                        'http://google.com/testz',
                                        'http://google.com/testz/a',
                                        'http://google.com',
                                        'http://gogle.com/'),
                                   self.dataset).items())
     self.assertTrue(
         res == {
             None: ['http://google.com', 'http://gogle.com/'],
             u'http://google.com/': ['http://google.com/'],
             u'http://google.com/*': ['http://google.com/quasd'],
             u'http://google.com/test': ['http://google.com/test'],
             u'http://google.com/test*': [
                 'http://google.com/test/of/test&a=1',
                 'http://google.com/test/of/testo',
                 'http://google.com/testz/a'
             ],
             u'http://google.com/testz': ['http://google.com/testz']
         })