Ejemplo n.º 1
0
    def setUpClass(self):
        import tests.data_test_webpage
        import httpbin
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run,
                                                      port=14887)
        self.httpbin = 'http://127.0.0.1:14887'

        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.phantomjs_proxy = '127.0.0.1:25555'
        self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run,
                                                 port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        try:
            self.phantomjs = subprocess.Popen([
                'phantomjs',
                os.path.join(os.path.dirname(__file__),
                             '../pyspider/fetcher/phantomjs_fetcher.js'),
                '25555'
            ])
        except OSError:
            self.phantomjs = None
        time.sleep(0.5)
Ejemplo n.º 2
0
    def setUpClass(self):
        import tests.data_test_webpage
        import httpbin

        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run,
                                                      port=14887,
                                                      passthrough_errors=False)
        self.httpbin = 'http://127.0.0.1:14887'

        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.phantomjs_proxy = '127.0.0.1:25555'
        self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run,
                                                 port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        self.proxy_thread = subprocess.Popen([
            'pyproxy', '--username=binux', '--password=123456', '--port=14830',
            '--debug'
        ],
                                             close_fds=True)
        self.proxy = '127.0.0.1:14830'
        try:
            self.phantomjs = subprocess.Popen([
                'phantomjs',
                os.path.join(os.path.dirname(__file__),
                             '../pyspider/fetcher/phantomjs_fetcher.js'),
                '25555'
            ])
        except OSError:
            self.phantomjs = None
        time.sleep(0.5)
Ejemplo n.º 3
0
 def setUpClass(self):
     self.projectdb = ProjectDB([
         os.path.join(os.path.dirname(__file__),
                      'data_fetcher_processor_handler.py')
     ])
     self.fetcher = Fetcher(None, None, async=False)
     self.status_queue = Queue()
     self.newtask_queue = Queue()
     self.result_queue = Queue()
     self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run,
                                                   port=14887,
                                                   passthrough_errors=False)
     self.httpbin = 'http://127.0.0.1:14887'
     self.proxy_thread = subprocess.Popen([
         'pyproxy', '--username=binux', '--password=123456', '--port=14830',
         '--debug'
     ],
                                          close_fds=True)
     self.proxy = '127.0.0.1:14830'
     self.processor = Processor(projectdb=self.projectdb,
                                inqueue=None,
                                status_queue=self.status_queue,
                                newtask_queue=self.newtask_queue,
                                result_queue=self.result_queue)
     self.project_name = 'data_fetcher_processor_handler'
     time.sleep(0.5)
Ejemplo n.º 4
0
    def setUpClass(self):
        import tests.data_test_webpage
        import httpbin

        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False)
        self.httpbin = 'http://127.0.0.1:14887'

        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.phantomjs_proxy = '127.0.0.1:25555'
        self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        self.proxy_thread = subprocess.Popen(['pyproxy', '--username=binux',
                                              '--password=123456', '--port=14830',
                                              '--debug'], close_fds=True)
        self.proxy = '127.0.0.1:14830'
        try:
            self.phantomjs = subprocess.Popen(['phantomjs',
                os.path.join(os.path.dirname(__file__),
                    '../pyspider/fetcher/phantomjs_fetcher.js'),
                '25555'])
        except OSError:
            self.phantomjs = None
        time.sleep(0.5)
Ejemplo n.º 5
0
    def setUpClass(self):
        import tests.data_test_webpage
        import httpbin

        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run,
                                                      host='0.0.0.0',
                                                      port=14887,
                                                      passthrough_errors=False)
        self.httpbin = 'http://' + socket.gethostbyname(
            socket.gethostname()) + ':14887'

        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.splash_endpoint = 'http://127.0.0.1:8050/execute'
        self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run,
                                                 port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        self.proxy_thread = subprocess.Popen([
            'pyproxy', '--username=binux', '--bind=0.0.0.0',
            '--password=123456', '--port=14830', '--debug'
        ],
                                             close_fds=True)
        self.proxy = '127.0.0.1:14830'
Ejemplo n.º 6
0
 def setUpClass(self):
     self.fetcher = Fetcher(None, None, async_mode=False)
     self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run,
                                                   port=14887,
                                                   passthrough_errors=False)
     self.httpbin = 'http://127.0.0.1:14887'
     time.sleep(0.5)
Ejemplo n.º 7
0
    def setUpClass(self):
        import tests.data_test_webpage
        import httpbin

        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887)
        self.httpbin = "http://127.0.0.1:14887"

        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.phantomjs_proxy = "127.0.0.1:25555"
        self.rpc = xmlrpc_client.ServerProxy("http://localhost:%d" % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        self.proxy_thread = subprocess.Popen(
            ["pyproxy", "--username=binux", "--password=123456", "--port=14830", "--debug"], close_fds=True
        )
        self.proxy = "127.0.0.1:14830"
        try:
            self.phantomjs = subprocess.Popen(
                [
                    "phantomjs",
                    os.path.join(os.path.dirname(__file__), "../pyspider/fetcher/phantomjs_fetcher.js"),
                    "25555",
                ]
            )
        except OSError:
            self.phantomjs = None
        time.sleep(0.5)
Ejemplo n.º 8
0
    def setUpClass(self):
        shutil.rmtree('./data/tests', ignore_errors=True)
        os.makedirs('./data/tests')

        import tests.data_test_webpage
        import httpbin
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False)
        self.httpbin = 'http://127.0.0.1:14887'
Ejemplo n.º 9
0
    def setUpClass(self):
        shutil.rmtree('./data/tests', ignore_errors=True)
        os.makedirs('./data/tests')

        import tests.data_test_webpage
        import httpbin
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887)
        self.httpbin = 'http://127.0.0.1:14887'
Ejemplo n.º 10
0
    def setUpClass(self):
        shutil.rmtree('./data/tests', ignore_errors=True)
        os.makedirs('./data/tests')

        import tests.data_test_webpage
        import httpbin
        from pyspider.webui import bench_test  # flake8: noqa
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run,
                                                      port=14887,
                                                      passthrough_errors=False)
        self.httpbin = 'http://127.0.0.1:14887'

        ctx = run.cli.make_context('test', [
            '--taskdb',
            'sqlalchemy+sqlite+taskdb:///data/tests/task.db',
            '--projectdb',
            'sqlalchemy+sqlite+projectdb:///data/tests/projectdb.db',
            '--resultdb',
            'sqlalchemy+sqlite+resultdb:///data/tests/resultdb.db',
        ],
                                   None,
                                   obj=ObjectDict(testing_mode=True))
        self.ctx = run.cli.invoke(ctx)

        self.threads = []

        ctx = run.scheduler.make_context('scheduler', [], self.ctx)
        self.scheduler = scheduler = run.scheduler.invoke(ctx)
        self.threads.append(run_in_thread(scheduler.xmlrpc_run))
        self.threads.append(run_in_thread(scheduler.run))

        ctx = run.fetcher.make_context('fetcher', [
            '--xmlrpc',
            '--xmlrpc-port',
            '24444',
        ], self.ctx)
        fetcher = run.fetcher.invoke(ctx)
        self.threads.append(run_in_thread(fetcher.xmlrpc_run))
        self.threads.append(run_in_thread(fetcher.run))

        ctx = run.processor.make_context('processor', [], self.ctx)
        processor = run.processor.invoke(ctx)
        self.threads.append(run_in_thread(processor.run))

        ctx = run.result_worker.make_context('result_worker', [], self.ctx)
        result_worker = run.result_worker.invoke(ctx)
        self.threads.append(run_in_thread(result_worker.run))

        ctx = run.webui.make_context(
            'webui', ['--scheduler-rpc', 'http://localhost:23333/'], self.ctx)
        app = run.webui.invoke(ctx)
        app.debug = True
        self.app = app.test_client()
        self.rpc = app.config['scheduler_rpc']

        time.sleep(1)
Ejemplo n.º 11
0
    def setUpClass(self):
        shutil.rmtree('./data/tests', ignore_errors=True)
        os.makedirs('./data/tests')

        import tests.data_test_webpage
        import httpbin
        from pyspider.webui import bench_test  # flake8: noqa
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False)
        self.httpbin = 'http://127.0.0.1:14887'

        ctx = run.cli.make_context('test', [
            '--taskdb', 'sqlalchemy+sqlite+taskdb:///data/tests/task.db',
            '--projectdb', 'sqlalchemy+sqlite+projectdb:///data/tests/projectdb.db',
            '--resultdb', 'sqlalchemy+sqlite+resultdb:///data/tests/resultdb.db',
        ], None, obj=ObjectDict(testing_mode=True))
        self.ctx = run.cli.invoke(ctx)

        self.threads = []

        ctx = run.scheduler.make_context('scheduler', [], self.ctx)
        self.scheduler = scheduler = run.scheduler.invoke(ctx)
        self.threads.append(run_in_thread(scheduler.xmlrpc_run))
        self.threads.append(run_in_thread(scheduler.run))

        ctx = run.fetcher.make_context('fetcher', [
            '--xmlrpc',
            '--xmlrpc-port', '24444',
        ], self.ctx)
        fetcher = run.fetcher.invoke(ctx)
        self.threads.append(run_in_thread(fetcher.xmlrpc_run))
        self.threads.append(run_in_thread(fetcher.run))

        ctx = run.processor.make_context('processor', [], self.ctx)
        processor = run.processor.invoke(ctx)
        self.threads.append(run_in_thread(processor.run))

        ctx = run.result_worker.make_context('result_worker', [], self.ctx)
        result_worker = run.result_worker.invoke(ctx)
        self.threads.append(run_in_thread(result_worker.run))

        ctx = run.webui.make_context('webui', [
            '--scheduler-rpc', 'http://localhost:23333/'
        ], self.ctx)
        app = run.webui.invoke(ctx)
        app.debug = True
        self.app = app.test_client()
        self.rpc = app.config['scheduler_rpc']

        time.sleep(1)
Ejemplo n.º 12
0
    def setUpClass(self):
        shutil.rmtree("./data/tests", ignore_errors=True)
        os.makedirs("./data/tests")

        import tests.data_test_webpage
        import httpbin

        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887)
        self.httpbin = "http://127.0.0.1:14887"

        ctx = run.cli.make_context(
            "test",
            [
                "--taskdb",
                "sqlite+taskdb:///data/tests/task.db",
                "--projectdb",
                "sqlite+projectdb:///data/tests/projectdb.db",
                "--resultdb",
                "sqlite+resultdb:///data/tests/resultdb.db",
            ],
            None,
            obj=ObjectDict(testing_mode=True),
        )
        self.ctx = run.cli.invoke(ctx)

        ctx = run.scheduler.make_context("scheduler", [], self.ctx)
        scheduler = run.scheduler.invoke(ctx)
        run_in_thread(scheduler.xmlrpc_run)
        run_in_thread(scheduler.run)

        ctx = run.fetcher.make_context("fetcher", [], self.ctx)
        fetcher = run.fetcher.invoke(ctx)
        run_in_thread(fetcher.run)

        ctx = run.processor.make_context("processor", [], self.ctx)
        processor = run.processor.invoke(ctx)
        run_in_thread(processor.run)

        ctx = run.result_worker.make_context("result_worker", [], self.ctx)
        result_worker = run.result_worker.invoke(ctx)
        run_in_thread(result_worker.run)

        ctx = run.webui.make_context("webui", ["--scheduler-rpc", "http://localhost:23333/"], self.ctx)
        app = run.webui.invoke(ctx)
        app.debug = True
        self.app = app.test_client()
        self.rpc = app.config["scheduler_rpc"]

        time.sleep(1)
Ejemplo n.º 13
0
    def setUpClass(self):
        import tests.data_test_webpage
        import httpbin

        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, host='0.0.0.0', port=14887, passthrough_errors=False)
        self.httpbin = 'http://' + socket.gethostbyname(socket.gethostname()) + ':14887'

        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.splash_endpoint = 'http://127.0.0.1:8050/execute'
        self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        self.proxy_thread = subprocess.Popen(['pyproxy', '--username=binux',
                                              '--password=123456', '--port=14830',
                                              '--debug'], close_fds=True)
        self.proxy = '127.0.0.1:14830'
Ejemplo n.º 14
0
 def setUpClass(self):
     self.projectdb = ProjectDB([os.path.join(os.path.dirname(__file__), 'data_fetcher_processor_handler.py')])
     self.fetcher = Fetcher(None, None, async=False)
     self.status_queue = Queue()
     self.newtask_queue = Queue()
     self.result_queue = Queue()
     self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887)
     self.httpbin = 'http://127.0.0.1:14887'
     self.proxy_thread = subprocess.Popen(['pyproxy', '--username=binux',
                                           '--password=123456', '--port=14830',
                                           '--debug'], close_fds=True)
     self.proxy = '127.0.0.1:14830'
     self.processor = Processor(projectdb=self.projectdb,
                                inqueue=None,
                                status_queue=self.status_queue,
                                newtask_queue=self.newtask_queue,
                                result_queue=self.result_queue)
     self.project_name = 'data_fetcher_processor_handler'
     time.sleep(0.5)
Ejemplo n.º 15
0
 def setUpClass(self):
     self.projectdb = ProjectDB([
         os.path.join(os.path.dirname(__file__),
                      'data_fetcher_processor_handler.py')
     ])
     self.fetcher = Fetcher(None, None, async=False)
     self.status_queue = Queue()
     self.newtask_queue = Queue()
     self.result_queue = Queue()
     self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run,
                                                   port=14887)
     self.httpbin = 'http://127.0.0.1:14887'
     self.processor = Processor(projectdb=self.projectdb,
                                inqueue=None,
                                status_queue=self.status_queue,
                                newtask_queue=self.newtask_queue,
                                result_queue=self.result_queue)
     self.project_name = 'data_fetcher_processor_handler'
     time.sleep(0.5)
Ejemplo n.º 16
0
 def setUpClass(self):
     self.fetcher = Fetcher(None, None, async=False)
     self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False)
     self.httpbin = 'http://127.0.0.1:14887'
     time.sleep(0.5)