Example #1
0
 def _build(self, config):
     try:
         self.name = config['name']
         Logger.register(self.name)
         Logger.load()
         self.logger = Logger(self.name)
         self.debug = config.get('debug', True)
         self.logger.info(self.name, 'Start building...')
         self.scraper = Scraper.get(config['scraper']['name'])(self)
         if 'args' in config['scraper']:
             self.scraper.setargs(config['scraper']['args'])
         self.frontier = Frontier.get(config['frontier']['name'])(self)
         if 'args' in config['frontier']:
             self.frontier.setargs(config['frontier']['args'])
         for each in config['handlers']:
             handler = Handler.get(each['name'])(self)
             if 'args' in each:
                 handler.setargs(each['args'])
             self.handlers.append(handler)
         self.logger.info(self.name, 'Build successful!')
     except KeyError as e:
         raise PyCrawlerException('Key \''+e.args[0]+'\' missing in config dict')
Example #2
0
 def test__tmpfilename(self):
     h = Handler.get('TempHandler')(SpiderTest('testspider'))
     self.assertEqual('./tmp/testspider/' + str(gethash('sample')) + '.html', h._tmpfilename('sample'))
     self.assertTrue(os.path.exists('./tmp/'))
Example #3
0
 def test_parse(self):
     h = Handler.get('TempHandler')(SpiderTest('testspider'))
     h.parse('conent', 'testurl1')
     self.assertTrue(os.path.exists(h._tmpfilename('testurl1')))
Example #4
0
 def test_setargs(self):
     h = Handler.get('TempHandler')(SpiderTest('testspider'))
     self.assertEqual('./tmp/testspider/', h.args['path'])
     args = {'path': './newpath/'}
     h.setargs(args)
     self.assertEqual('./newpath/testspider/', h.args['path'])