Ejemplo n.º 1
0
    def testMQ(self):
        mq = self.mq0
        data = [str(random.randint(10000, 50000)) for _ in range(20)]
        c = Clock()
        mq.put(data, flush=True)
        gets = []
        while True:
            get = mq.get()
            if get is None:
                break
            gets.append(get)

        self.assertEqual(sorted(data), sorted(gets))
        print(c.clock())

        # test mq client
        data = str(random.randint(10000, 50000))
        self.client.put(data)
        get = self.client.get()
        self.assertEqual(data, get)

        self.client.put(Url('http://qinxuye.me', priority=1))
        get = self.client.get(priority=1)
        self.assertEqual(get.url, 'http://qinxuye.me')

        # test put into different priorities
        self.client.put(Url('http://qinxuye.me', priority=0))
        self.client.put(Url('http://qinxuye.me/about', priority=1))
        self.client.put(u'三星')

        self.assertEqual(
            self.client.get(priority=1).url, 'http://qinxuye.me/about')
        self.assertEqual(self.client.get(priority=0).url, 'http://qinxuye.me')
        print(c.clock())
Ejemplo n.º 2
0
 def run_job(self, job_name):
     self.logger.debug('entering worker run phase, job id: %s' % job_name)
     if job_name not in self.running_jobs:
         self.logger.debug(
             'job not prepared, refused to run, job id: %s' % job_name)
         return False
     
     job_info = self.running_jobs[job_name]
     
     clock = Clock()
     job_info.clock = clock
     job_info.thread.start()
     
     self.logger.debug('worker starts to run job, id: %s' % job_name)
     return True
Ejemplo n.º 3
0
    def _run_local_job(self, job_path, overwrite=False, rpc_server=None, settings=None):
        job_desc = import_job_desc(job_path)
        if settings is not None: job_desc.update_settings(settings)
        base_name = job_desc.uniq_name
        self.env['job_desc'][base_name] = job_desc

        addr_dirname = self.addr.replace('.', '_').replace(':', '_')
        working_dir = os.path.join(self.working_dir, 'worker', addr_dirname)
        clear = job_desc.settings.job.clear
        job_name, working_dir = self._get_name_and_dir(
            working_dir, base_name, overwrite=overwrite, clear=clear)
                    
        clock = Clock()
        job = Job(self, job_path, job_name, job_desc=job_desc,
                  working_dir=working_dir, rpc_server=rpc_server,
                  manager=self.manager)
        t = threading.Thread(target=job.run, args=(True, ))
        t.start()
        
        stopped = multiprocessing.Event()
        def stop(signum, frame):
            if 'main' not in multiprocessing.current_process().name.lower():
                return
            if stopped.is_set():
                return
            else:
                stopped.set()
                
            self.logger.debug("Catch interrupt signal, start to stop")
            job.shutdown()
            if rpc_server:
                rpc_server.shutdown()
            
        signal.signal(signal.SIGINT, stop)
        signal.signal(signal.SIGTERM, stop)
        
        idle_times = 0
        while t.is_alive():
            if job.get_status() == FINISHED:
                break
            if job.get_status() == IDLE:
                idle_times += 1
                if idle_times > MAX_IDLE_TIMES:
                    break
            else:
                idle_times = 0
            
            try:
                t.join(5)
            except IOError:
                break
            
        need_shutdown = False
        if not job.stopped.is_set() and job.get_status() == FINISHED:
            self.logger.debug('All objects have been fetched, try to finish job')
            need_shutdown = True
        elif not stopped.is_set() and not t.is_alive():
            need_shutdown = True
        elif not job.stopped.is_set() and job.get_status() == IDLE:
            self.logger.debug('No bundle or url to perform, try to finish job')
            need_shutdown = True
            
        if need_shutdown is True:
            job.shutdown()
            if rpc_server:
                rpc_server.shutdown()

        self.logger.debug('Job id:%s finished, spend %.2f seconds for running' % (
            job_name, clock.clock()))
Ejemplo n.º 4
0
    def _run_local_job(self,
                       job_path,
                       overwrite=False,
                       rpc_server=None,
                       settings=None):
        job_desc = import_job_desc(job_path)
        if settings is not None: job_desc.update_settings(settings)
        base_name = job_desc.uniq_name
        self.env['job_desc'][base_name] = job_desc

        working_dir = os.path.join(self.working_dir, 'worker')
        clear = job_desc.settings.job.clear
        job_name, working_dir = self._get_name_and_dir(working_dir,
                                                       base_name,
                                                       overwrite=overwrite,
                                                       clear=clear)

        clock = Clock()
        job = Job(self,
                  job_path,
                  job_name,
                  job_desc=job_desc,
                  working_dir=working_dir,
                  rpc_server=rpc_server,
                  manager=self.manager)
        t = threading.Thread(target=job.run, args=(True, ))
        t.start()

        stopped = multiprocessing.Event()

        def stop(signum, frame):
            if 'main' not in multiprocessing.current_process().name.lower():
                return
            if stopped.is_set():
                return
            else:
                stopped.set()

            self.logger.debug("Catch interrupt signal, start to stop")
            job.shutdown()
            if rpc_server:
                rpc_server.shutdown()

        signal.signal(signal.SIGINT, stop)
        signal.signal(signal.SIGTERM, stop)

        idle_times = 0
        while t.is_alive():
            if job.get_status() == FINISHED:
                break
            if job.get_status() == IDLE:
                idle_times += 1
                if idle_times > MAX_IDLE_TIMES:
                    break
            else:
                idle_times = 0

            try:
                t.join(5)
            except IOError:
                break

        need_shutdown = False
        if not job.stopped.is_set() and job.get_status() == FINISHED:
            self.logger.debug(
                'All objects have been fetched, try to finish job')
            need_shutdown = True
        elif not stopped.is_set() and not t.is_alive():
            need_shutdown = True
        elif not job.stopped.is_set() and job.get_status() == IDLE:
            self.logger.debug('No bundle or url to perform, try to finish job')
            need_shutdown = True

        if need_shutdown is True:
            job.shutdown()
            if rpc_server:
                rpc_server.shutdown()

        self.logger.debug(
            'Job id:%s finished, spend %.2f seconds for running' %
            (job_name, clock.clock()))
Ejemplo n.º 5
0
 def run(self):
     try:
         curr_priority = 0
         priority_deals = [True for _ in range(self.full_priorities)]
         while not self.stopped.is_set():
             priority_name = 'inc' if curr_priority == self.n_priorities \
                                 else curr_priority
             is_inc = priority_name == 'inc'
             
             while not self.nonsuspend.wait(5):
                 continue
             if self.stopped.is_set():
                 break
             
             if priority_deals[curr_priority] is True:
                 self.logger.debug('start to process priority: %s' % priority_name)
             
             last = self.priorities_secs[curr_priority]
             clock = Clock()
             self.runnings = []
             try:
                 no_budgets_times = 0                    
                 while not self.stopped.is_set():
                     if clock.clock() >= last:
                         break
                     
                     if not is_inc:
                         if self._has_not_finished(curr_priority):
                             no_budgets_times = 0
                             self._get_unit(curr_priority, self.runnings)
                         else:
                             if self.settings.job.size=='auto':
                                 self._get_unit(curr_priority, self.runnings)
                                 # if get unit success, then apply budget, in case budget are wasted
                                 if len(self.runnings)>0:
                                     status = self._apply(no_budgets_times)
                                     if status == CANNOT_APPLY:
                                         priority_deals[curr_priority] = False
                                         break
                                     elif status == APPLY_FAIL:
                                         no_budgets_times += 1
                                         if len(self.runnings) == 0:
                                             continue
                                     else:
                                         no_budgets_times = 0
                             # keep compability
                             else:
                                 status = self._apply(no_budgets_times)
                                 if status == CANNOT_APPLY:
                                     priority_deals[curr_priority] = False
                                     break
                                 elif status == APPLY_FAIL:
                                     no_budgets_times += 1
                                     if len(self.runnings) == 0:
                                         continue
                                 else:
                                     no_budgets_times = 0
                                     self._get_unit(curr_priority, self.runnings)
                                 
                     else:
                         self._get_unit(curr_priority, self.runnings)
                         
                     if len(self.runnings) == 0:
                         priority_deals[curr_priority] = False
                         break
                     else:
                         priority_deals[curr_priority] = True
                     if self.is_bundle:
                         self.logger.debug('process bundle from priority %s' % priority_name)
                         rest = min(last - clock.clock(), MAX_BUNDLE_RUNNING_SECONDS)
                         if rest <= 0:
                             break
                         self.running = self.runnings.pop()
                         obj = self.executor.execute(self.running, rest, is_inc=is_inc)
                     else:
                         self.running = self.runnings.pop()
                         obj = self.executor.execute(self.running, is_inc=is_inc)
                         
                     self.running = None
                     if obj is not None:
                         self.runnings.insert(0, obj)
                         self.runnings = OrderedDict.fromkeys(self.runnings).keys()
             finally:
                 self.priorities_objs[curr_priority].extend(self.runnings)
                 
             curr_priority = (curr_priority + 1) % self.full_priorities
     finally:
         self.counter_client.sync()
         self.save()
Ejemplo n.º 6
0
 def run(self):
     try:
         curr_priority = 0
         priority_deals = [True for _ in range(self.full_priorities)]
         while not self.stopped.is_set():
             priority_name = 'inc' if curr_priority == self.n_priorities \
                                 else curr_priority
             is_inc = priority_name == 'inc'
             
             while not self.nonsuspend.wait(5):
                 continue
             if self.stopped.is_set():
                 break
             
             if priority_deals[curr_priority] is True:
                 self.logger.debug('start to process priority: %s' % priority_name)
             
             last = self.priorities_secs[curr_priority]
             clock = Clock()
             self.runnings = []
             try:
                 no_budgets_times = 0                    
                 while not self.stopped.is_set():
                     if clock.clock() >= last:
                         break
                     
                     if not is_inc:
                         if self._has_not_finished(curr_priority):
                             no_budgets_times = 0
                             self._get_unit(curr_priority, self.runnings)
                         else:
                             status = self._apply(no_budgets_times)
                             if status == CANNOT_APPLY:
                                 priority_deals[curr_priority] = False
                                 break
                             elif status == APPLY_FAIL:
                                 no_budgets_times += 1
                                 if len(self.runnings) == 0:
                                     continue
                             else:
                                 no_budgets_times = 0
                                 self._get_unit(curr_priority, self.runnings)
                     else:
                         self._get_unit(curr_priority, self.runnings)
                         
                     if len(self.runnings) == 0:
                         priority_deals[curr_priority] = False
                         break
                     else:
                         priority_deals[curr_priority] = True
                     if self.is_bundle:
                         self.logger.debug(
                             'process bundle from priority %s' % priority_name)
                         rest = min(last - clock.clock(), MAX_BUNDLE_RUNNING_SECONDS)
                         if rest <= 0:
                             break
                         self.running = self.runnings.pop()
                         obj = self.executor.execute(self.running, rest, is_inc=is_inc)
                     else:
                         self.running = self.runnings.pop()
                         obj = self.executor.execute(self.running, is_inc=is_inc)
                         
                     self.running = None
                     if obj is not None:
                         self.runnings.insert(0, obj)
                         self.runnings = OrderedDict.fromkeys(self.runnings).keys()
             finally:
                 self.priorities_objs[curr_priority].extend(self.runnings)
                 
             curr_priority = (curr_priority+1) % self.full_priorities
     finally:
         self.counter_client.sync()
         self.save()