예제 #1
0
 def generate_nextround_task_thread(self, cntRound):
     try:
         self.generate_nextround_task(cntRound)
         self.isGeneratingTasks = False
     except Exception, e:
         log.info(e)
         mail.send_mail('[error]generating task for job, jobID:' +
                        self.jobID +
                        ' failed, you should check why and restart the job')
예제 #2
0
 def check_workermanager(self):
     try:
         self.__bigGranularityLock.acquire()
         for ip, workermanagerinfo in self.__workermanagerdict.items():
             workermanagerstatus = get_workermanagerstatus(ip)
             if workermanagerstatus == status.WORKER.shutdown:
                 log.warning('workermanager, ip:'+str(ip)+' is shutdown, we will remove all tasks on it:')
                 log.warning(workermanagerinfo.get_taskqueue())
                 mail.send_mail('[warning] workermanager, ip:' + str(ip) + ' is shutdown, please check it')
                 for task in workermanagerinfo.get_taskqueue():
                     pos = decodeID.taskID(task.get_ID())
                     self.taskPool.set_task_status(pos, status.TASK.done, status.TASK.dispatched)
                 self.__workermanagerdict.pop(ip)
                 break
     except Exception, e:
         log.info(e)
예제 #3
0
    def generate_task_from_job_thread(self):
        try:
            desBucketName = self.destination['bucketName']
            if self.is_this_job_has_finished(self.jobType):
                log.info('the ' + self.jobType + ' of ' + self.synctask +
                         ' is already finished')
                self.currentRound = self.startRound + self.requireRoundNum - 1
                self.isGeneratingTasks = False
                return

            if self.source[
                    'filetype'] != type.FileType.DiskFile and self.source[
                        'filetype'] != type.FileType.UrlFile:
                self.srcClient = clientfactory.create_client(
                    self.source['filetype'], self.source['accessKey'],
                    self.source['secretKey'], self.source['endpoint'])
            cntRound = 0
            while True:
                roundStatus = self.get_this_round_status(
                    desBucketName, self.synctask + '/' + self.jobType +
                    '/round' + str(cntRound) + '/roundstatus')
                if roundStatus and roundStatus == 'done':
                    cntRound += 1
                else:
                    break
            if roundStatus and roundStatus == 'ready':
                self.recover_this_round_task(cntRound)
            else:
                if cntRound == 0:
                    self.generate_round0_task()
                else:
                    self.startRound = cntRound
                    self.generate_nextround_task(cntRound)
            put_roundstatus(
                self.desClient, desBucketName,
                self.synctask + '/' + self.jobType + '/round' +
                str(self.startRound) + '/roundstatus', 'ready')
            self.isGeneratingTasks = False
        except (ossexception.GetObjectFailed, ossexception.PutObjectFailed,
                ossexception.ListObjectFailed,
                taskexception.GenerateTaskFailed, Exception), e:
            log.warn(e)
            mail.send_mail('[error]generating task for job, jobID:' +
                           self.jobID +
                           ' failed, you should check why and restart the job')
예제 #4
0
def monitor_thread():
    while True:
        try:
            flag = 0
            WorkerManager.bigGranularityLock.acquire()
            for worker in WorkerManager.workerqueue:
                workerstatus = get_workerstatus(worker.get_port())
                if workerstatus == status.WORKER.shutdown:
                    if worker.get_status() == status.WORKER.busy:
                        flag = 1
                        task = worker.get_task()
                        WorkerManager.workerqueue.remove(worker)
                        message = WorkerMessage(WorkerManager.ip, task['ID'],
                                                worker.get_port(), False)
                        message.set_type(task['type'])
                        log.warning(
                            '[warning] ' + 'worker, port:' +
                            str(worker.get_port()) + ' ip:' +
                            str(WorkerManager.ip) +
                            ' is shutdown, we will remove task, taskID:' +
                            str(task['ID']) + ' on it')
                        if task.get_isDeleted() == False:
                            mail.send_mail(
                                '[warning] ' + 'worker, port:' +
                                str(worker.get_port()) + ' ip:' +
                                str(WorkerManager.ip) +
                                ' is shutdown, we will remove task, taskID:' +
                                str(task['ID']) + ' on it')
                    else:
                        WorkerManager.workerqueue.remove(worker)
                        log.warning('[warning] ' + 'worker, port:' +
                                    str(worker.get_port()) + ' ip:' +
                                    str(WorkerManager.ip) +
                                    ' is shutdown, no tasks on it')
                        mail.send_mail('[warning] ' + 'worker, port:' +
                                       str(worker.get_port()) + ' ip:' +
                                       str(WorkerManager.ip) +
                                       ' is shutdown, no tasks on it')
                    break
        except Exception, e:
            log.warning(e)
        finally:
예제 #5
0
def monitor_thread():
    while True:
        try:
            MasterManager.bigGranularityLock.acquire()
            flag = 0
            message = None
            for master in MasterManager.masterqueue:
                masterstatus = get_masterstatus(master.get_port())
                if masterstatus == status.MASTER.shutdown:
                    if master.get_status() == status.MASTER.busy:
                        flag = 1
                        job = master.get_job()
                        message = Message('127.0.0.1', job['job-ID'],
                                          master.get_port(), False)
                        MasterManager.masterqueue.remove(master)
                        log.warning(
                            'master, port:' + str(master.get_port()) +
                            ' is shutdown, we will remove job, jobID:' +
                            job['job-ID'] + ' on it')
                        mail.send_mail(
                            '[warning] master, port:' +
                            str(master.get_port()) +
                            ' is shutdown, we will remove job, jobID:' +
                            job['job-ID'] + ' on it')
                        break
                    else:
                        MasterManager.masterqueue.remove(master)
                        log.warning('master, port:' + str(master.get_port()) +
                                    ' is shutdown, no jobs on it')
                        mail.send_mail('master, port:' +
                                       str(master.get_port()) +
                                       ' is shutdown, no jobs on it')
                        break
                elif masterstatus == status.MASTER.busy:
                    set_master_token(master.get_port())

        except Exception, e:
            log.info(e)
        finally:
예제 #6
0
def send_message_to_master(master_ip_port, message):
    retryTimes = 0
    while True:
        try:
            s = xmlrpclib.ServerProxy('http://' + str(master_ip_port),
                                      allow_none=True)
            if s.send_message(message):
                log.info('send message:' + message.get_ID() +
                         ' to master successfully, master_ip_port:' +
                         str(master_ip_port))
                return True

        except Exception, e:
            log.warning(e)
            log.warning('send message to master failed, master_ip_port:' +
                        str(master_ip_port) + ', will retry')
            retryTimes += 1
            sleep(10)
        if retryTimes > 5:
            mail.send_mail(
                '[error] send message to master failed, master_ip_port:' +
                str(master_ip_port) + ', you should check why')
예제 #7
0
 def send_message(self, messageAttr):
     try:
         JobManager.bigGranularityLock.acquire()
         message = Message().with_attribute(messageAttr)
         log.info('receive message from mastermanager, job:' +
                  str(message.get_ID()))
         log.info(messageAttr)
         isFindFlag = 0
         for job in JobManager.jobqueue:
             if str(job.get_ID()) == str(message.get_ID()):
                 isFindFlag = 1
                 if message.get_isSuccess() == True:
                     commands.save_job_status_to_local(
                         job.get_ID(), 'done\n')
                     if job.get_job()['sync-enable-increment'] == 'True':
                         save_job_lastdonetime(job.get_ID())
                         syncinfo_times_pp(job.get_ID())
                     JobManager.jobqueue.remove(job)
                     log.info('[info] ' + job.to_string() +
                              ' finished successfully')
                     mail.send_mail('[info] ' + job.to_string() +
                                    ' finished successfully')
                 else:
                     if job.get_retrytimes() < JobManager.MAXJOBRETRYTIMES:
                         job.set_retrytimes(job.get_retrytimes() + 1)
                         job.set_status(status.JOB.ready)
                         log.info('[warning] ' + job.to_string() +
                                  ' failed, we will retry, retrytimes:' +
                                  str(job.get_retrytimes()))
                         mail.send_mail(
                             '[warning] ' + job.to_string() +
                             ' failed, we will retry, retrytimes:' +
                             str(job.get_retrytimes()))
                     else:
                         commands.save_job_status_to_local(
                             job.get_ID(), 'failed\n')
                         if job.get_job(
                         )['sync-enable-increment'] == 'True':
                             save_job_lastdonetime(job.get_ID())
                         JobManager.jobqueue.remove(job)
                         log.warning('[error] ' + job.to_string() +
                                     ' failed, please check it')
                         mail.send_mail('[error] ' + job.to_string() +
                                        ' failed, please check it')
                 break
         if isFindFlag == 0:
             log.info('we didn\'t have any record of ' + message.get_ID())
         return True
     except Exception, e:
         log.info(e)
         return True