Beispiel #1
0
def run_scheduler():
    from scheduler import Scheduler
    scheduler = Scheduler(taskdb=get_taskdb(), projectdb=get_projectdb(),
            newtask_queue=newtask_queue, status_queue=status_queue, out_queue=scheduler2fetcher)

    run_in_thread(scheduler.xmlrpc_run, port=scheduler_xmlrpc_port)
    scheduler.run()
Beispiel #2
0
def run_fetcher(g=g):
    from fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=g.scheduler2fetcher, outqueue=g.fetcher2processor)
    fetcher.phantomjs_proxy = g.phantomjs_proxy

    run_in_thread(fetcher.xmlrpc_run, port=g.fetcher_xmlrpc_port, bind=g.webui_host)
    fetcher.run()
Beispiel #3
0
 def setUpClass(self):
     self.inqueue = Queue(10)
     self.outqueue = Queue(10)
     self.fetcher = Fetcher(self.inqueue, self.outqueue)
     self.rpc = xmlrpclib.ServerProxy('http://localhost:%d' % 24444)
     self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
     self.thread = utils.run_in_thread(self.fetcher.run)
Beispiel #4
0
 def setUpClass(self):
     self.inqueue = Queue(10)
     self.outqueue = Queue(10)
     self.fetcher = Fetcher(self.inqueue, self.outqueue)
     self.rpc = xmlrpclib.ServerProxy("http://localhost:%d" % 24444)
     self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
     self.thread = utils.run_in_thread(self.fetcher.run)
Beispiel #5
0
def run_scheduler():
    from scheduler import Scheduler
    scheduler = Scheduler(taskdb=get_taskdb(), projectdb=get_projectdb(),
            newtask_queue=newtask_queue, status_queue=status_queue, out_queue=scheduler2fetcher)

    run_in_thread(scheduler.xmlrpc_run, port=scheduler_xmlrpc_port)
    scheduler.run()
Beispiel #6
0
 def run_scheduler():
     scheduler = Scheduler(taskdb=get_taskdb(), projectdb=get_projectdb(),
             newtask_queue=self.newtask_queue, status_queue=self.status_queue,
             out_queue=self.scheduler2fetcher, data_path="./test/data/")
     scheduler.UPDATE_PROJECT_INTERVAL = 0.05
     scheduler.LOOP_INTERVAL = 0.01
     run_in_thread(scheduler.xmlrpc_run, port=self.scheduler_xmlrpc_port)
     scheduler.run()
Beispiel #7
0
 def run_scheduler():
     scheduler = Scheduler(taskdb=get_taskdb(), projectdb=get_projectdb(),
             newtask_queue=self.newtask_queue, status_queue=self.status_queue,
             out_queue=self.scheduler2fetcher, data_path="./test/data/")
     scheduler.UPDATE_PROJECT_INTERVAL = 0.1
     scheduler.LOOP_INTERVAL = 0.1
     scheduler._last_tick = time.time() # not dispatch cronjob
     run_in_thread(scheduler.xmlrpc_run, port=self.scheduler_xmlrpc_port)
     scheduler.run()
Beispiel #8
0
def run_scheduler(g=g):
    from scheduler import Scheduler
    scheduler = Scheduler(taskdb=g.taskdb, projectdb=g.projectdb, resultdb=g.resultdb,
            newtask_queue=g.newtask_queue, status_queue=g.status_queue,
            out_queue=g.scheduler2fetcher)
    if g.demo_mode:
        scheduler.INQUEUE_LIMIT = 1000

    run_in_thread(scheduler.xmlrpc_run, port=g.scheduler_xmlrpc_port, bind=g.webui_host)
    scheduler.run()
Beispiel #9
0
def run_scheduler(g=g):
    from scheduler import Scheduler
    scheduler = Scheduler(taskdb=g.taskdb, projectdb=g.projectdb, resultdb=g.resultdb,
            newtask_queue=g.newtask_queue, status_queue=g.status_queue,
            out_queue=g.scheduler2fetcher)
    if g.demo_mode:
        scheduler.INQUEUE_LIMIT = 1000

    run_in_thread(scheduler.xmlrpc_run, port=g.scheduler_xmlrpc_port, bind=g.webui_host)
    scheduler.run()
Beispiel #10
0
 def run_scheduler():
     scheduler = Scheduler(taskdb=get_taskdb(),
                           projectdb=get_projectdb(),
                           newtask_queue=self.newtask_queue,
                           status_queue=self.status_queue,
                           out_queue=self.scheduler2fetcher,
                           data_path="./test/data/")
     scheduler.UPDATE_PROJECT_INTERVAL = 0.1
     scheduler.LOOP_INTERVAL = 0.1
     scheduler._last_tick = time.time()  # not dispatch cronjob
     run_in_thread(scheduler.xmlrpc_run,
                   port=self.scheduler_xmlrpc_port)
     scheduler.run()
Beispiel #11
0
    def setUpClass(self):
        shutil.rmtree('./test/data/', ignore_errors=True)
        os.makedirs('./test/data/')

        def get_taskdb():
            return taskdb.TaskDB(self.taskdb_path)
        self.taskdb = get_taskdb()
        def get_projectdb():
            return projectdb.ProjectDB(self.projectdb_path)
        self.projectdb = get_projectdb()
        def get_resultdb():
            return resultdb.ResultDB(self.resultdb_path)
        self.resultdb = get_resultdb()

        self.newtask_queue = Queue(10)
        self.status_queue = Queue(10)
        self.scheduler2fetcher = Queue(10)
        self.rpc = xmlrpclib.ServerProxy('http://localhost:%d' % self.scheduler_xmlrpc_port)

        def run_scheduler():
            scheduler = Scheduler(taskdb=get_taskdb(), projectdb=get_projectdb(),
                    newtask_queue=self.newtask_queue, status_queue=self.status_queue,
                    out_queue=self.scheduler2fetcher, data_path="./test/data/",
                    resultdb=get_resultdb())
            scheduler.UPDATE_PROJECT_INTERVAL = 0.1
            scheduler.LOOP_INTERVAL = 0.1
            scheduler.INQUEUE_LIMIT = 10
            Scheduler.DELETE_TIME = 0
            scheduler._last_tick = int(time.time()) # not dispatch cronjob
            run_in_thread(scheduler.xmlrpc_run, port=self.scheduler_xmlrpc_port)
            scheduler.run()

        self.process = run_in_thread(run_scheduler)
        time.sleep(1)
Beispiel #12
0
    def test_40_multiple_threading_error(self):
        def put(q):
            for i in range(100):
                q.put("DATA_%d" % i)
        def get(q):
            for i in range(100):
                q.get()

        thread = utils.run_in_thread(put, self.q3)
        get(self.q3)
Beispiel #13
0
    def test_40_multiple_threading_error(self):
        def put(q):
            for i in range(100):
                q.put("DATA_%d" % i)
        def get(q):
            for i in range(100):
                q.get()

        thread = utils.run_in_thread(put, self.q3)
        get(self.q3)
Beispiel #14
0
    def setUpClass(self):
        shutil.rmtree('./test/data/', ignore_errors=True)
        os.makedirs('./test/data/')

        def get_resultdb():
            return resultdb.ResultDB(self.resultdb_path)
        self.resultdb = get_resultdb()
        self.inqueue = Queue(10)

        def run_result_worker():
            self.result_worker = ResultWorker(get_resultdb(), self.inqueue)
            self.result_worker.run()
        self.process = run_in_thread(run_result_worker)
        time.sleep(1)
Beispiel #15
0
def scheduler(ctx, xmlrpc, xmlrpc_host, xmlrpc_port,
              inqueue_limit, delete_time, active_tasks, loop_limit, scheduler_cls):
    """
    Run Scheduler, only one scheduler is allowed.
    """
    g = ctx.obj
    Scheduler = load_cls(None, None, scheduler_cls)

    scheduler = Scheduler(taskdb=g.taskdb, projectdb=g.projectdb, resultdb=g.resultdb,
                          newtask_queue=g.newtask_queue, status_queue=g.status_queue,
                          out_queue=g.scheduler2fetcher, data_path=g.get('data_path', 'data'))
    scheduler.INQUEUE_LIMIT = inqueue_limit
    scheduler.DELETE_TIME = delete_time
    scheduler.ACTIVE_TASKS = active_tasks
    scheduler.LOOP_LIMIT = loop_limit

    g.instances.append(scheduler)
    if g.get('testing_mode'):
        return scheduler

    if xmlrpc:
        utils.run_in_thread(scheduler.xmlrpc_run, port=xmlrpc_port, bind=xmlrpc_host)
    scheduler.run()
Beispiel #16
0
    def setUpClass(self):
        shutil.rmtree('./test/data/', ignore_errors=True)
        os.makedirs('./test/data/')

        def get_taskdb():
            return taskdb.TaskDB(self.taskdb_path)

        self.taskdb = get_taskdb()

        def get_projectdb():
            return projectdb.ProjectDB(self.projectdb_path)

        self.projectdb = get_projectdb()

        def get_resultdb():
            return resultdb.ResultDB(self.resultdb_path)

        self.resultdb = get_resultdb()

        self.newtask_queue = Queue(10)
        self.status_queue = Queue(10)
        self.scheduler2fetcher = Queue(10)
        self.rpc = xmlrpclib.ServerProxy('http://localhost:%d' %
                                         self.scheduler_xmlrpc_port)

        def run_scheduler():
            scheduler = Scheduler(taskdb=get_taskdb(),
                                  projectdb=get_projectdb(),
                                  newtask_queue=self.newtask_queue,
                                  status_queue=self.status_queue,
                                  out_queue=self.scheduler2fetcher,
                                  data_path="./test/data/",
                                  resultdb=get_resultdb())
            scheduler.UPDATE_PROJECT_INTERVAL = 0.1
            scheduler.LOOP_INTERVAL = 0.1
            scheduler.INQUEUE_LIMIT = 10
            Scheduler.DELETE_TIME = 0
            scheduler._last_tick = int(time.time())  # not dispatch cronjob
            run_in_thread(scheduler.xmlrpc_run,
                          port=self.scheduler_xmlrpc_port)
            scheduler.run()

        self.process = run_in_thread(run_scheduler)
        time.sleep(1)
Beispiel #17
0
    def setUpClass(self):
        shutil.rmtree('./test/data/', ignore_errors=True)
        os.makedirs('./test/data/')

        def get_projectdb():
            return projectdb.ProjectDB(self.projectdb_path)
        self.projectdb = get_projectdb()
        self.in_queue = Queue(10)
        self.status_queue = Queue(10)
        self.newtask_queue = Queue(10)
        self.result_queue = Queue(10)

        def run_processor():
            self.processor = Processor(get_projectdb(), self.in_queue,
                    self.status_queue, self.newtask_queue, self.result_queue)
            self.processor.CHECK_PROJECTS_INTERVAL = 0.1
            self.processor.run()
        self.process = run_in_thread(run_processor)
        time.sleep(1)
Beispiel #18
0
    def setUpClass(self):
        shutil.rmtree('./test/data/', ignore_errors=True)
        os.makedirs('./test/data/')

        def get_projectdb():
            return projectdb.ProjectDB(self.projectdb_path)

        self.projectdb = get_projectdb()
        self.in_queue = Queue(10)
        self.status_queue = Queue(10)
        self.newtask_queue = Queue(10)
        self.result_queue = Queue(10)

        def run_processor():
            self.processor = Processor(get_projectdb(), self.in_queue,
                                       self.status_queue, self.newtask_queue,
                                       self.result_queue)
            self.processor.CHECK_PROJECTS_INTERVAL = 0.1
            self.processor.run()

        self.process = run_in_thread(run_processor)
        time.sleep(1)
Beispiel #19
0
def run_fetcher():
    from fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=scheduler2fetcher, outqueue=fetcher2processor)

    run_in_thread(fetcher.xmlrpc_run, port=fetcher_xmlrpc_port)
    fetcher.run()
Beispiel #20
0
 def setUp(self):
     self.fetcher = Fetcher(None, None)
     self.thread = utils.run_in_thread(self.fetcher.run)
Beispiel #21
0
 def setUp(self):
     self.fetcher = Fetcher(None, None)
     self.thread = utils.run_in_thread(self.fetcher.run)
Beispiel #22
0
def run_fetcher():
    from fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=scheduler2fetcher, outqueue=fetcher2processor)

    run_in_thread(fetcher.xmlrpc_run, port=fetcher_xmlrpc_port)
    fetcher.run()
Beispiel #23
0
    fetcher = Fetcher(inqueue=g.scheduler2fetcher, outqueue=g.fetcher2processor,
                      poolsize=poolsize, proxy=proxy, async=async)
    fetcher.phantomjs_proxy = g.phantomjs_proxy
    if user_agent:
        fetcher.user_agent = user_agent
    if timeout:
        fetcher.default_options = copy.deepcopy(fetcher.default_options)
        fetcher.default_options['timeout'] = timeout

    g.instances.append(fetcher)
    if g.get('testing_mode'):
        return fetcher

    if xmlrpc:
        utils.run_in_thread(fetcher.xmlrpc_run, port=xmlrpc_port, bind=xmlrpc_host)
    fetcher.run()


@cli.command()
@click.option('--processor-cls', default='pyspider.processor.Processor',
              callback=load_cls, help='Processor class to be used.')
@click.pass_context
def processor(ctx, processor_cls, enable_stdout_capture=True):
    """
    Run Processor.
    """
    g = ctx.obj
    Processor = load_cls(None, None, processor_cls)

    processor = Processor(projectdb=g.projectdb,