Beispiel #1
0
    def setUp(self):
        url_patterns = UrlPatterns(
            Url(r'^http://zh.wikipedia.org/wiki/[^(:|/)]+$', 'wiki_item',
                FakeWikiParser))
        fake_user_conf = Config(StringIO(user_conf))

        self.dir = tempfile.mkdtemp()

        self.job = Job(
            'fake wiki crawler',
            url_patterns,
            MechanizeOpener, [
                'http://zh.wikipedia.org/wiki/%E6%97%A0%E6%95%8C%E8%88%B0%E9%98%9F',
            ],
            user_conf=fake_user_conf)

        local_node = 'localhost:%s' % self.job.context.job.port
        nodes = [
            local_node,
        ]

        self.rpc_server = ColaRPCServer(
            ('localhost', self.job.context.job.port))
        self.loader = JobLoader(self.job)
        self.loader.init_mq(self.rpc_server, nodes, local_node, self.dir)

        thd = threading.Thread(target=self.rpc_server.serve_forever)
        thd.setDaemon(True)
        thd.start()
Beispiel #2
0
class Test(unittest.TestCase):
    def setUp(self):
        url_patterns = UrlPatterns(
            Url(r'^http://zh.wikipedia.org/wiki/[^(:|/)]+$', 'wiki_item',
                FakeWikiParser))
        fake_user_conf = Config(StringIO(user_conf))

        self.dir = tempfile.mkdtemp()

        self.job = Job(
            'fake wiki crawler',
            url_patterns,
            MechanizeOpener, [
                'http://zh.wikipedia.org/wiki/%E6%97%A0%E6%95%8C%E8%88%B0%E9%98%9F',
            ],
            user_conf=fake_user_conf)

        local_node = 'localhost:%s' % self.job.context.job.port
        nodes = [
            local_node,
        ]

        self.rpc_server = ColaRPCServer(
            ('localhost', self.job.context.job.port))
        self.loader = JobLoader(self.job)
        self.loader.init_mq(self.rpc_server, nodes, local_node, self.dir)

        thd = threading.Thread(target=self.rpc_server.serve_forever)
        thd.setDaemon(True)
        thd.start()

    def tearDown(self):
        try:
            self.loader.finish()
            self.rpc_server.shutdown()
        finally:
            shutil.rmtree(self.dir)

    def testJobLoader(self):
        self.assertEqual(len(self.job.starts), 1)

        self.loader.mq.put(self.job.starts)
        self.assertEqual(self.loader.mq.get(), self.job.starts[0])

        # put starts into mq again
        self.loader.mq.put(self.job.starts)
        self.loader.run()

        self.assertEqual(len(f.getvalue().strip(sep).split(sep)), 10)
Beispiel #3
0
 def setUp(self):
     url_patterns = UrlPatterns(
         Url(r'^http://zh.wikipedia.org/wiki/[^(:|/)]+$', 'wiki_item', FakeWikiParser)
     )
     fake_user_conf = Config(StringIO(user_conf))
     
     self.dir = tempfile.mkdtemp()
     
     self.job = Job('fake wiki crawler', url_patterns, MechanizeOpener, 
                    ['http://zh.wikipedia.org/wiki/%E6%97%A0%E6%95%8C%E8%88%B0%E9%98%9F', ],
                    user_conf=fake_user_conf)
     
     local_node = 'localhost:%s' % self.job.context.job.port
     nodes = [local_node, ]
     
     self.rpc_server = ColaRPCServer(('localhost', self.job.context.job.port))
     self.loader = JobLoader(self.job)
     self.loader.init_mq(self.rpc_server, nodes, local_node, self.dir)
     
     thd = threading.Thread(target=self.rpc_server.serve_forever)
     thd.setDaemon(True)
     thd.start()
Beispiel #4
0
class Test(unittest.TestCase):


    def setUp(self):
        url_patterns = UrlPatterns(
            Url(r'^http://zh.wikipedia.org/wiki/[^(:|/)]+$', 'wiki_item', FakeWikiParser)
        )
        fake_user_conf = Config(StringIO(user_conf))
        
        self.dir = tempfile.mkdtemp()
        
        self.job = Job('fake wiki crawler', url_patterns, MechanizeOpener, 
                       ['http://zh.wikipedia.org/wiki/%E6%97%A0%E6%95%8C%E8%88%B0%E9%98%9F', ],
                       user_conf=fake_user_conf)
        
        local_node = 'localhost:%s' % self.job.context.job.port
        nodes = [local_node, ]
        
        self.rpc_server = ColaRPCServer(('localhost', self.job.context.job.port))
        self.loader = JobLoader(self.job)
        self.loader.init_mq(self.rpc_server, nodes, local_node, self.dir)
        
        thd = threading.Thread(target=self.rpc_server.serve_forever)
        thd.setDaemon(True)
        thd.start()

    def tearDown(self):
        try:
            self.loader.finish()
            self.rpc_server.shutdown()
        finally:
            shutil.rmtree(self.dir)


    def testJobLoader(self):
        self.assertEqual(len(self.job.starts), 1)
        
        self.loader.mq.put(self.job.starts)
        self.assertEqual(self.loader.mq.get(), self.job.starts[0])
        
        # put starts into mq again
        self.loader.mq.put(self.job.starts)
        self.loader.run()
         
        self.assertEqual(len(f.getvalue().strip(sep).split(sep)), 10)