def _build(self, config): try: self.name = config['name'] Logger.register(self.name) Logger.load() self.logger = Logger(self.name) self.debug = config.get('debug', True) self.logger.info(self.name, 'Start building...') self.scraper = Scraper.get(config['scraper']['name'])(self) if 'args' in config['scraper']: self.scraper.setargs(config['scraper']['args']) self.frontier = Frontier.get(config['frontier']['name'])(self) if 'args' in config['frontier']: self.frontier.setargs(config['frontier']['args']) for each in config['handlers']: handler = Handler.get(each['name'])(self) if 'args' in each: handler.setargs(each['args']) self.handlers.append(handler) self.logger.info(self.name, 'Build successful!') except KeyError as e: raise PyCrawlerException('Key \''+e.args[0]+'\' missing in config dict')
def test__tmpfilename(self): h = Handler.get('TempHandler')(SpiderTest('testspider')) self.assertEqual('./tmp/testspider/' + str(gethash('sample')) + '.html', h._tmpfilename('sample')) self.assertTrue(os.path.exists('./tmp/'))
def test_parse(self): h = Handler.get('TempHandler')(SpiderTest('testspider')) h.parse('conent', 'testurl1') self.assertTrue(os.path.exists(h._tmpfilename('testurl1')))
def test_setargs(self): h = Handler.get('TempHandler')(SpiderTest('testspider')) self.assertEqual('./tmp/testspider/', h.args['path']) args = {'path': './newpath/'} h.setargs(args) self.assertEqual('./newpath/testspider/', h.args['path'])