Example #1
0
def jobRunStatusWatch(data, stat, event=None):
    try:
        if data and event is not None:
            jobId = event.path.split("/")[3]
            zope.event.notify(JobStatusChangeEvent(jobId))
    except Exception:
        Logger.exception(log)
def action(queue):
    while True:
        try:
            jobId = queue.get()
            doAction(jobId)
        except Exception:
            Logger.exception(log)
def runJob(q):
    while True:
        try:
            evnet = q.get()
            zope.event.notify(evnet)
        except Exception:
            Logger.exception(log)
Example #4
0
def jobReadyWatch(children):
    try:
#         brokers={}
        for jobId in children:
            try:
                job=StoreHolder.getServerStore().getJob(jobId)
                if CacheHolder.getCache().hasKey(jobId, JOBS) is False:
                    CacheHolder.getCache().put(jobId, job,JOBS)
                    for taskName in job.tasks:
                        TaskCacheHolder.getJobCache().put(taskName,job.jobId)
            except Exception:
                Logger.exception( log)
            #偷个懒,只要没有删除的全部放到ROUTER里面去
#             if job.status != JOB_DELETE:
#                 
#                 for taskName in job.tasks:
#                     que=StoreHolder.getServerStore().getQueue(job.brokerQueue)
#                     TaskCacheHolder.getJobCache().put(taskName,job.jobId)
#                      
#                 if brokerServer in brokers:
#                     brokers[brokerServer].update(routes)
#                 else:
#                     brokers[brokerServer] = routes
#                 
#         #偷个懒,只要没有删除的全部放到ROUTER里面去
#         for broker,routes in brokers.items():
#             brokerServer = StoreHolder.getServerStore().getBrokerServer(broker)
#             CabbageHolder.getServerCabbages()[brokerServer.hostName].getApp().conf.update(CELERY_ROUTES = routes)
#             Logger.info(log,"更新队列服务器【%s】ROUTES【%s】"% (CabbageHolder.getServerCabbagesStr(),str(routes)))
        
    except Exception:
        Logger.exception( log)
def addBroberServerHandler(event):
    if event and event.brokerServer:
        brokerServer = event.brokerServer
        Logger.info(log,"添加队列服务器【%s】,URI:【%s】"%( brokerServer.hostName,brokerServer.connectUri))
        cabbage = Cabbage(hostName=brokerServer.hostName,broker=brokerServer.connectUri)
        
        CabbageHolder.getServerCabbages()[brokerServer.hostName]= cabbage
        Logger.debug(log,"添加队列服务器【%s】"% CabbageHolder.getServerCabbagesStr())
Example #6
0
def handleRequest(conn, addr):
    try:
        resultMessage = doRequestHandle(conn)  #msgInstance.doAction()
        #result一般为message类型
        if resultMessage:
            conn.sendall(MessageCodec().encode(resultMessage))
    except Exception:
        Logger.exception(log)
Example #7
0
def workBrokerQueueChangeHandler(event):
#     CacheHolder.getCache().put(QUEUES,event.brokerQueues,WORKS)
    with storeFactory.store() as store:
        work = store.getWork(HOST_NAME)
    CacheHolder.getCache().put(HOST_NAME,work,WORKS)
    if event.isEvent and (work.status == ON_LINE):
        Logger.info(log,"restart")
        CabbageControlHolder.getCabbageControl().restartCelery()
Example #8
0
def workStatusWatch(data, stat, event=None):
    try:
        if data and event is not None:
            #path=u'/cabbage/works/huamac/status')
            hostname = event.path.split("/")[3]
            if hostname == HOST_NAME:
                zope.event.notify(ClientWorkStatusEvent(data))
    except Exception:
        Logger.exception(log)
Example #9
0
def workOnlineWatch(data, stat=None, event=None):
    if event is not None:
        #节点已死
        if event.type=="DELETED":
            #"/cabbage/works/"+HOST_NAME+"/"+ON_LINE
            hostName = event.path.split("/")[3]
            work =  StoreHolder.getServerStore().getWork(hostName)
            work.status = LOST
            Logger.info( log,"节点:【%s】IP:【%s】已经死亡!" % (hostName,work.ip) )
            StoreHolder.getServerStore().updateWorkStatus(work)
Example #10
0
def configWatch(children):
    try:
        for l in children :
            pa =CONFIG_PATH+"/"+l
            kazooClient.addDataListener(pa, configOptionDataChange)
            data= kazooClient.getData(pa)
            ConfigHolder.getConfig().setProperty(BASE,l ,data)
            
    except Exception:
        Logger.exception( log)
Example #11
0
def workBrokerQueueWatch(children):
    try:
        data = kazooClient.getData("/" + CABBAGE + "/" + WORKS + "/" +
                                   HOST_NAME + "/" + QUEUES)
        #         print data
        if data and data == DO_NOTHING:
            return
        zope.event.notify(WorkBrokerQueueChangeEvent(children, isEvent=True))
    except Exception:
        Logger.exception(log)
Example #12
0
 def updateTaksSent(self,taskName):
     try:
         self.lock.acquire()
         if taskName in self.taskSent:
             self.taskSent[taskName]= self.taskSent.get(taskName)+1
         else:
             self.taskSent[taskName]=1
     except Exception as e:
         Logger.exception(log)
     finally:
         self.lock.release()
Example #13
0
def jobAduitStatusWatch(data, stat, event=None):
    try:
        if data and data == JOB_AUTH_PASS and event is not None:
            # example /cabbage/jobs/job-47778319-7a86-4b2b-a43a-5e2e94504350/status
            jobId = event.path.split("/")[3]
            with storeFactory.store() as store:
                job = store.getJob(jobId)
            updateJobCache(jobId, job)
            zope.event.notify(JobAuditPassEvent(jobId))
    except Exception:
        Logger.exception(log)
def taskSucceeded(state, event, app):
    taskId = event['uuid']
    task = state.tasks.get(taskId)
    #     taskName  = task.name if task and hasattr(task,'name') else None
    jobId, taskName = _getJobIdAndTaskName(taskId)
    queueTime = 0
    runtime = 0
    #     state.
    #FIXME 经常找不到TASK.name
    #@TODO
    log.debug("【%s】 TASK SUCCEEDED  !" % (event['uuid']))
    try:
        #         jobId= None
        #         if hasattr(task,'kwargs') and task.kwargs is not None and JOB_ID in task.kwargs:
        #             jobId= eval(str(task.kwargs))[JOB_ID]
        #
        #         if jobId is None and taskName:
        #             jobId = TaskCacheHolder.getJobCache().get(taskName)
        #
        #         if taskName is None or jobId is None:
        #             jobId,taskName = _getJobIdAndTaskName(taskId)
        #             print jobId,taskName

        job = CacheHolder.getCache().get(jobId, JOBS)

        result = AsyncResult(taskId,
                             app=CabbageHolder.getServerCabbage(
                                 job.brokerServer).getApp())

        if not isinstance(result.backend, DisabledBackend):
            log.debug("【%s】 TASK SUCCEEDED result【%s】 !" %
                      (event['uuid'], result.result))
            if result.result:
                jobResults.addResult(jobId, result.result)

# #     print task.started - task.received
#     print queueTime
#         print "task.started:%s"%task.started
#         print "task.received:%s"%task.received
#         print "task.runtime:%s" % event['runtime']
#         print event
        if task and task.started and task.received:
            queueTime = task.started - task.received
        runtime = event['runtime']

        #         with storeFactory.store() as store:
        #             store.deleteTaskId( taskId)

        CabbageCounterHolder.getCabbageCounter().updateTaskSucceeded(
            taskName, _getHostName(event), runtime, queueTime)
#         raise Exception("test")
    except Exception as e:
        Logger.exception(log)
def taskReceived(state, event, app):
    #     task = state.tasks.get(event['uuid'])
    log.debug("%s" % event)
    #     print task.name
    #     print "monitor"
    #     print event
    #     print dir(task)
    try:
        CabbageCounterHolder.getCabbageCounter().updateTaskReceived(
            event['name'], _getHostName(event))
    except:
        Logger.exception(log)
Example #16
0
def jobAuditPassHandler(event):
    try:
        jobId = event.jobId
        syncJob(jobId)
        #通知子进程进行加载模块
        #         zope.event.notify(JobNeedLoadEvent(jobId))
        from cabbage.process.cabbage_control_holder import CabbageControlHolder
        #         CabbageControlHolder.getCabbageControl().addJobId(event.jobId)
        CabbageControlHolder.getCabbageControl().restartCelery()

    except Exception:
        Logger.exception(log)
    def saveJob(self, job):
        parent = "/" + CABBAGE + "/" + JOBS + "/" + job.jobId
        self.client.create(parent, makepath=True)
        Logger.debug(log, parent)
        self.client.create(parent + "/" + JOB_NAME, value=job.jobName)
        self.client.create(parent + "/" + FILE_PATH, value=job.filePath)
        self.client.create(parent + "/" + FILE_NAME, value=job.fileName)
        self.client.create(parent + "/" + FILE_TYPE, value=job.fileType)
        self.client.create(parent + "/" + STATUS, value=job.status)
        self.client.create(parent + "/" + AUDIT_STATUS, value=job.auditStatus)
        self.client.create(parent + "/" + RUN_STRATEGY, value=job.runStrategy)
        self.client.create(parent + "/" + STRATEGY_VALUE,
                           value=job.strategyValue)
        self.client.create(parent + "/" + ATTACH_FILES)
        self.client.create(parent + "/" + REULST_BACKEND,
                           value=job.resultBackend)
        for f in job.attachFiles:
            self.client.create(parent + "/" + ATTACH_FILES + "/" + f.fileName,
                               value=f.filePath,
                               makepath=True)
            self.client.create(parent + "/" + ATTACH_FILES + "/" + f.fileName +
                               "/" + FILE_TYPE,
                               value=f.fileType,
                               makepath=True)
        for w in job.works:
            self.client.create(parent + "/" + WORKS + "/" + LIST + "/" +
                               w.hostName,
                               value=w.port,
                               makepath=True)
            if not self.client.isExistPath(parent + "/" + WORKS + "/" +
                                           READIES):
                self.client.create(parent + "/" + WORKS + "/" + READIES)

        if job.tasks:
            for task in job.tasks:
                self.client.create(parent + "/" + TASKS + "/" + task,
                                   makepath=True)
                self.client.create("/" + CABBAGE + "/" + JOBS + "/" + RESULTS +
                                   "/" + job.jobId + "/" + task,
                                   makepath=True)

        self.client.create(parent + "/" + BROKER_SERVER,
                           value=job.brokerServer)

        self.client.create(parent + "/" + QUEUE, value=job.brokerQueue)

        #         for q in job.queue:
        #             self.client.create(parent+"/"+BROKER_SERVER+"/"+QUEUES+"/"+q)

        #使用该目录是因为,如果前面的目录没有创建完,集群的节点已经开始监控,导致数据不完整,所以,单独才用该目录来进行节点的监控
        self.client.create("/" + CABBAGE + "/" + JOBS + "/" + READIES + "/" +
                           job.jobId)
Example #18
0
 def addScriptJobId(self,jobId):
     store =None
     try:
         store = StoreHolder.getStore()
         log.info("节点【%s】当前任务【%s】的脚本开始加载。。。。" % (HOST_NAME,jobId))
         self.loadJobScript(jobId,store)
         work=store.getWork(HOST_NAME)
         self.sendBeReady(jobId,work,store)
         store.close()
     except Exception:
         Logger.exception(log)
     finally:
         if store:
             store.close()
Example #19
0
 def updateTaskReceived(self,taskName,hostName):
     try:
         self.lock.acquire()
         if taskName in self.taskReceived:
             if hostName in self.taskReceived[taskName]:
                 self.taskReceived[taskName][hostName]=self.taskReceived[taskName][hostName]+1
             else:
                 self.taskReceived[taskName].update({hostName:1})
         else:
             self.taskReceived[taskName]={hostName:1}
             
     except Exception as e:
         Logger.exception(log)
     finally:
         self.lock.release()
def doAction(jobId):
    if not CacheHolder.getCache().hasKey(jobId, JOBS):
        with storeFactory.store() as store:
            job = store.getJob(jobId)
            CacheHolder.getCache().put(jobId, job, JOBS)

    job = CacheHolder.getCache().get(jobId, JOBS)

    Logger.debug(log, "upload files. job【%s】" % str(job.asDict()))

    if job.resultBackend == None:
        return
    elif job.resultBackend == NFS:
        CabbageNfsBackend(jobId).save()
    elif job.resultBackend == HDFS:
        CabbageHdfsBackend(jobId).save()
Example #21
0
 def updateTaskSucceeded(self,taskName,hostName,runTime,queueTime):
     try:
         self.lock.acquire()
         if taskName in self.taskSucceeded:
             if hostName in self.taskSucceeded[taskName]:
                 m= self.taskSucceeded[taskName][hostName]
                 m[TASK_COUNT]=m[TASK_COUNT]+1
                 m[TASK_RUNTIME]=m[TASK_RUNTIME]+runTime
                 m[TASK_QUEUE_TIME]=m[TASK_QUEUE_TIME]+queueTime
             else:
                 self.taskSucceeded[taskName].update({hostName:{TASK_COUNT:1,TASK_RUNTIME:runTime,TASK_QUEUE_TIME:queueTime}})
         else:
             self.taskSucceeded[taskName]={hostName:{TASK_COUNT:1,TASK_RUNTIME:runTime,TASK_QUEUE_TIME:queueTime}}
             
     except Exception as e:
         Logger.exception(log)
     finally:
         self.lock.release()
Example #22
0
def jobChildWatch(children):
    try:
        for jobId in children:
            jobId = str(jobId)
            with storeFactory.store() as store:
                job = store.getJob(jobId)
                work = store.getWork(HOST_NAME)
            if CacheHolder.getCache().hasKey(
                    jobId, JOBS) is False and job.brokerQueue in work.queues:
                '''添加job的状态监控'''
                parent = "/" + CABBAGE + "/" + JOBS + "/" + jobId
                kazooClient.addDataListener(parent + "/" + STATUS,
                                            jobRunStatusWatch)
                kazooClient.addDataListener(parent + "/" + AUDIT_STATUS,
                                            jobAduitStatusWatch)
                updateJobCache(jobId, job)
    except Exception:
        Logger.exception(log)
def taskSent(state, event, app):
    #     task = state.tasks.get(event['uuid'])
    #     log.info("%s"% task)
    #     print task.name
    #     print "monitor"
    #     print event
    #     print dir(task)
    try:
        taskName = event['name']
        taskId = event['uuid']

        #         jobId = eval(str(event['kwargs']))[JOB_ID]
        jobId = TaskCacheHolder.getJobCache().get(taskName)
        #         StoreHolder.getRedisStaticStore().saveTaskId(jobId, taskName, taskId)

        CabbageCounterHolder.getCabbageCounter().updateTaksSent(taskName)
    except:
        Logger.exception(log)
def taskFailed(state, event, app):
    eventOutDic = event.copy()
    taskName = None
    try:
        taskId = event['uuid']
        task = state.tasks.get(taskId)
        #
        #         taskName  = task.name if task and hasattr(task,'name') else None
        #
        #         if hasattr(task,'kwargs') and task.kwargs is not None and JOB_ID in task.kwargs:
        #             eventOutDic[JOB_ID] = eval(str(task.kwargs))[JOB_ID]
        #
        #         if eventOutDic[JOB_ID] is None:
        #             eventOutDic[JOB_ID] = TaskCacheHolder.getJobCache().get(taskName)

        #         if taskName is None or eventOutDic[JOB_ID] is None :
        jobId, taskName = _getJobIdAndTaskName(taskId)
        eventOutDic[JOB_ID] = jobId

        job = CacheHolder.getCache().get(eventOutDic[JOB_ID], JOBS)

        brokerServer = job.brokerServer
        taskPath = ConfigHolder.getConfig().getProperty(
            BASE, TASK_FAILLOG_PATH)

        if not os.path.isdir(taskPath):
            os.makedirs(taskPath)

        dateStr = getNowDateStr()
        with open(taskPath + "/" + brokerServer + "_" + dateStr + ".log",
                  "a+") as writer:
            writer.write(str(eventOutDic) + "\n")

#         with storeFactory.store() as store:
#             store.deleteTaskId(task.id)

#         StoreHolder.getRedisStaticStore().deleteTaskId(taskId)

        CabbageCounterHolder.getCabbageCounter().updateTaksFail(
            taskName, _getHostName(event))

    except Exception as e:
        Logger.exception(log)
Example #25
0
 def doAction(self,actionFun):
     sendDict=None
     receivedDict=None
     failDict=None
     succeedDict=None
     try:
         self.lock.acquire()
         sendDict = self.taskSent.copy()
         receivedDict = self.taskReceived.copy()
         failDict = self.taskFail.copy()
         succeedDict = self.taskSucceeded.copy()
         actionFun(sendDict,receivedDict,failDict,succeedDict)
         self.taskFail.clear()
         self.taskSent.clear()
         self.taskSucceeded.clear()
         self.taskReceived.clear()
     except Exception as e:
         Logger.exception(log)
     finally:
         self.lock.release()
    def save(self):

        hdfsPath=ConfigHolder.getConfig().getProperty(BASE,HDFS_ROOT_PATH)
       
        dateStr = getNowDateStr()
        if self.jobId:
            localPath = getLocalFilesPath()
            dateStr = getNowDateStr()
            hour = getNowHour()
            
            if hour == 0:# 提交前一天的数据
                dateStr = formatDate(subDay(getNow(),1),f="%Y%m%d")
            
            p = localPath+"/"+self.jobId+"/result/"+dateStr
            Logger.debug( log, "upload file to hdfs. jobId【%s】 date【%s】" % (self.jobId,dateStr))
            if not  os.path.isdir(p):
                return
            
            fileNames = os.listdir(p)
            if len(fileNames) == 0:
                return
            
            client =HdfsClientHolder.getHdfsClient()
            remoteDire=hdfsPath+"/"+self.jobId
            
            if not client.isDirectory(remoteDire):
                client.mkdir(remoteDire)
            remoteDire= remoteDire+"/"+dateStr
            
            if not client.isDirectory(remoteDire):
                client.mkdir(remoteDire)
            Logger.info(log,"hour:%s  files:%s"%(hour,",".join(fileNames)))
            for fileName in fileNames:
                
                if hour != 0:
                    if int(fileName) >= hour:
                        continue
#                 if os.path.isfile(p+"/"+fileName):
            
                self.uploadToHdfs(client,localPath,self.jobId,hdfsPath,fileName,dateStr)
                os.remove(p+"/"+fileName)
def jobRemoveHandler(event):
    try:
        jobId = event.jobId
        if  JobCacheHolder.getJobCache().has_key(jobId):
            jobRun = JobCacheHolder.getJobCache().get(jobId)
            if jobRun : #停止运行TASK
                jobRun.stop()
            else:
                job =CacheHolder.getCache().get(jobId, JOBS)
                for taskName in job.tasks:
                    CabbageHolder.getServerCabbage(job.brokerServer).revokeByTaskName(taskName)
                
        with storeFactory.store() as store:
            store.updateJobStatus(jobId, JOB_DELETE)
        #删除缓存让下一个task可以同名
        tasks=CacheHolder.getCache().get(jobId, JOBS).tasks
        for taskName in tasks:
            if TaskCacheHolder.getJobCache().has_key(taskName):
                TaskCacheHolder.getJobCache().remove(taskName)
        
        CacheHolder.getCache().remove(jobId, JOBS)
    except:
        Logger.exception(log)
 def save(self):
     try:
         nfsPath=ConfigHolder.getConfig().getProperty(BASE,NFS_DIRECTORY)
         dateStr = getNowDateStr()
         if self.jobId:
             localPath = getLocalFilesPath()
             dateStr = getNowDateStr()
             hour = getNowHour()
             
             if hour == 0:# 提交前一天的数据
                 dateStr = formatDate(subDay(getNow(),1),f="%Y%m%d")
             
             localPath = localPath+"/"+self.jobId+"/result/"+dateStr
             
             Logger.info( log, "upload file to nfs. jobId【%s】 date【%s】" % (self.jobId,dateStr))
             if not  os.path.isdir(localPath):
                 return
             
             fileNames = os.listdir(localPath)
             if len(fileNames) == 0:
                 return
             
             remoteDire=nfsPath+"/"+self.jobId+"/"+dateStr
             
             if not os.path.isdir(remoteDire):
                 os.makedirs(remoteDire)
 #                 os.chmod(remoteDire,777)
             Logger.info(log,"hour:%s  files:%s"%(hour,",".join(fileNames)))
             for fileName in fileNames:
                 if hour != 0:
                     if int(fileName) >= hour:
                         continue
                     
                 newFileName = None
                 if os.environ[CABBAGE] ==MASTER:
                     newFileName = HOST_NAME+"_"+LOCAL_IP+"_"+MASTER+"_"+fileName
                 else:
                     newFileName = HOST_NAME+"_"+LOCAL_IP+"_"+NODE+"_"+fileName
                 
                 if os.path.isfile(localPath+"/"+fileName):
                     shutil.move(localPath+"/"+fileName,remoteDire+"/"+newFileName)
                     
     except Exception as e:
         Logger.exception(log)
                 
                 
Example #29
0
def jobWebWatch(children):
    store = storeFactory.getStore()
    try:
        brokers={}
        for jobId in children:
            try:
                job=store.getJob(jobId)#toreHolder.getStore().getJob(jobId)
                if CacheHolder.getCache().hasKey(jobId, JOBS) is False:
                    CacheHolder.getCache().put(jobId, job,JOBS)
                
#                 kazooClient.addDataListener(parent+"/"+STATUS, jobRunStatusWatch)
                #偷个懒,只要没有删除的全部放到ROUTER里面去
                if job.status != JOB_DELETE:
                    brokerServer=job.brokerServer
                   
                    routes={}
                    for taskName in job.tasks:
                        que=store.getQueue(job.brokerQueue)
                        routes[taskName]={'queue': que.queueName, 'routing_key': que.routingKey}
                        TaskCacheHolder.getJobCache().put(taskName,job.jobId)
                         
                    if brokerServer in brokers:
                        brokers[brokerServer].update(routes)
                    else:
                        brokers[brokerServer] = routes
            except Exception:
                Logger.exception( log)
                
        #偷个懒,只要没有删除的全部放到ROUTER里面去
        for broker,routes in brokers.items():
            brokerServer = store.getBrokerServer(broker)
            #修复BUG,导致任务提交的celery队列里面去了
            cabbage = Cabbage(hostName=brokerServer.hostName,broker=brokerServer.connectUri)
            cabbage.app.conf.update(CELERY_ROUTES = routes)
            CabbageHolder.getServerCabbages()[brokerServer.hostName] = cabbage
            
#             CabbageHolder.getServerCabbages()[brokerServer.hostName].getApp().conf.update(CELERY_ROUTES = routes)
            Logger.info(log,"更新队列服务器【%s】ROUTES【%s】"% (brokerServer.hostName,str(routes)))
        
    except Exception:
        Logger.exception( log)
    finally:
        storeFactory.returnStroe(store)
Example #30
0
# -*- encoding: utf-8 -*-
'''
Created on 2016年8月17日

@author: hua
'''
from cabbage.common.log.logger import Logger
from cabbage.utils.host_name import getHostName


def error():
    raise Exception("dadsfasdf")


log = Logger.getLogger(__name__)
try:
    error()
except Exception as e:
    log.exception(getHostName())