def __init__(self, ip=ConfigHolder.getConfig().getProperty(BASE, SERVER_IP), port=int(ConfigHolder.getConfig().getProperty( BASE, SERVER_PORT))): self.ip = ip self.port = port self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
def getLocalFilesPath(): serverType = os.environ.get(CABBAGE) if serverType == NODE: return ConfigHolder.getConfig().getProperty(BASE, CLIENT_FILE_DIRECTORY) elif serverType == MASTER: return ConfigHolder.getConfig().getProperty(BASE, SERVER_FILE_DIRECTORY)
def configWatch(children): try: for l in children : pa =CONFIG_PATH+"/"+l kazooClient.addDataListener(pa, configOptionDataChange) data= kazooClient.getData(pa) ConfigHolder.getConfig().setProperty(BASE,l ,data) except Exception: Logger.exception( log)
def __init__(self, ip=ConfigHolder.getConfig().getProperty(BASE, SERVER_IP), port=int(ConfigHolder.getConfig().getProperty( BASE, SERVER_PORT)), maxCon=1000): self.pool = None # do not accept more than 10000 connections self.ip = ip self.port = port self.maxCon = maxCon self.inited = False
def run(self): jobId = self.message.jobId fileName = self.message.fileName serverDir = ConfigHolder.getConfig().getProperty( BASE, SERVER_FILE_DIRECTORY) jobDir = serverDir + "/" + jobId filePath = jobDir + "/" + fileName # job =None with storeFactory.store() as sotre: job = sotre.getJob( jobId ) #StoreHolder.getServerStore().getJob(jobId)#CacheHolder.getCache().get(jobId,JOBS) msg = FileResponseMessage() msg.fileName = fileName msg.jobId = jobId if self.message.type == self.message.MAIN: if fileName == job.fileName: f = open(filePath) msg.fileContent = base64.encodestring(f.read()) f.close() return msg if self.message.type == self.message.ATTACH: for attachFile in job.attachFiles: if attachFile.fileName == fileName: f = open(filePath) msg.fileContent = base64.encodestring(f.read()) f.close() break return msg
def _initJobs(self): store = StoreHolder.getRetryStore() jobs =store.getJobs() work =store.getWork(HOST_NAME) queues=work.queues routes={} for job in jobs: if job.status != JOB_DELETE and job.brokerQueue in queues: #fixbug 动态扩容时,缓存JOB if not CacheHolder.getCache().hasKey(job.jobId, JOBS): CacheHolder.getCache().put(job.jobId, job,JOBS) clientDir = ConfigHolder.getConfig().getProperty(BASE,CLIENT_FILE_DIRECTORY) path = clientDir+"/"+job.jobId if not os.path.isdir(path) : # @FIX BUG 文件不同步 syncJob(job.jobId,store) self.addScriptJobId(job.jobId) for taskName in job.tasks: que= store.getQueue(job.brokerQueue) routes[taskName]={'queue': que.queueName, 'routing_key': que.routingKey} celeryconfig.CELERY_ROUTES = routes
def start(self): log.info("启动web服务.........") application = CabbageApplication([ (r"/", MainHandler), (r"/toNewJob", NewJobHandler), (r"/runJob", JobRunHandler), (r"/jobList", JobListHandler), (r"/jobRunList", JobRunListHandler), (r"/jobListData", JobListDataHandler), (r"/removeJob", RemoveJobListHandlder), (r"/work/list", WorkListHandler), (r"/work/workStatusChange", WorkStatusHandler), (r"/queues/brokerServer", BrokerServerHandler), (r"/queues/brokerQueue", BrokerQueueHandler), (r"/queues/selectQueue", BrokerQueueByHostNameListHandler), (r"/queues/addQueue", AddBrokerQueueHandler), (r"/queues/addQueueNode", AddBrokerQueueNodeHandler), (r"/queues/addBrokerServer", AddBrokerServerHandler), (r"/config", ConfigHandler), (r"/settings", SettingsHandler), (r"/login", LoginHandler), ], debug=False,**settings) port =ConfigHolder.getConfig().getProperty(BASE, SERVER_WEB_PORT) sockets = tornado.netutil.bind_sockets(port) tornado.process.fork_processes(8) server = HTTPServer(application) server.add_sockets(sockets) log.info("web服务启动成功,端口:%s........."%port)
class Logger(): logging.config.fileConfig(ConfigHolder.getConfig().getProperty( BASE, LOG_CONFIG_PATH)) loggers = {} @classmethod def getLogger(self, key): if key in Logger.loggers: return Logger.loggers[key] else: logger = logging.getLogger(key) Logger.loggers[key] = logger return logger @classmethod def info(self, log, message): log.info("【%s】: %s" % (HOST_NAME, message)) @classmethod def debug(self, log, message): log.debug("【%s】: %s" % (HOST_NAME, message)) @classmethod def error(self, log, message): log.error("【%s】: %s" % (HOST_NAME, message)) @classmethod def exception(self, log): log.exception("【%s】:" % HOST_NAME)
def _getConnectUri(self): connectUri = ConfigHolder.getConfig().getProperty(BASE, CONNECT_URI) work = CacheHolder.getCache().get(HOST_NAME, WORKS) if work.brokerServer: brokerServer = StoreHolder.getStore().getBrokerServer( work.brokerServer) connectUri = brokerServer.connectUri return connectUri
def _initConifg(self): CONFIG_PATH="/cabbage/config" if not self.kazooClient.isExistPath(CONFIG_PATH): self.kazooClient.create(CONFIG_PATH, makepath=True) for key,value in ConfigHolder.getConfig().items(BASE): self.kazooClient.createPersistent(CONFIG_PATH+"/"+key,value) self.kazooClient.addChildListener(CONFIG_PATH, configWatch)
def __init__(self, cfgPath=None): path = ConfigHolder.getConfig(cfgPath=cfgPath).getProperty( BASE, SERVER_FILE_DIRECTORY) if not os.path.isdir(path): os.makedirs(path) self.kazooClient = ZookeeperClientHolder.getRetryClient() self.store = StoreHolder.getServerStore() self.status = None self._initConifg()
def _loadMain(self): serverDir = ConfigHolder.getConfig().getProperty( BASE, SERVER_FILE_DIRECTORY) path = serverDir + "/" + self.job.jobId loadMoudle = LoadMoudleHolder.getLoadMoudle(PYTHON) classes = loadMoudle.load(path, self.job.fileName) for clazz in classes: obj = clazz[1] if isCabbageMain(obj): return obj return None
def getTasks(self, fileName, jobId): serverDir = ConfigHolder.getConfig().getProperty( BASE, SERVER_FILE_DIRECTORY) path = serverDir + "/" + jobId loadMoudle = LoadMoudleHolder.getLoadMoudle(PYTHON) classes = loadMoudle.load(path, fileName) tasks = [] for clazz in classes: cls = clazz[1] if isCabbageTask(cls): tasks.append(cls.__module__ + "." + cls.__name__) return tasks
def loadMain(): serverDir = ConfigHolder.getConfig().getProperty(BASE, SERVER_FILE_DIRECTORY) path = serverDir + "/job-06a4c37e-3ca2-46cb-9344-9f23fc03e8c7" loadMoudle = LoadMoudleHolder.getLoadMoudle(PYTHON) classes = loadMoudle.load(path, "product_list_crawler_main.py") for clazz in classes: obj = clazz[1] print clazz if isCabbageMain(obj): return obj return None
def loadMain(): serverDir = ConfigHolder.getConfig().getProperty(BASE,SERVER_FILE_DIRECTORY) path = "/Users/hua/workspace/mypython/cabbage/samples" loadMoudle = LoadMoudleHolder.getLoadMoudle(PYTHON) classes =loadMoudle.load(path,"test_both_task.py") for clazz in classes: obj = clazz[1] print obj if isCabbageTask(obj): print obj return obj return None
def _initData(self): if not self.kazooClient.isExistPath("/cabbage/jobs"): self.kazooClient.create("/cabbage/jobs", makepath=True) self.kazooClient.create("/cabbage/jobs/readies", makepath=True) self.kazooClient.create("/cabbage/jobs/results") if not self.kazooClient.isExistPath("/cabbage/works"): self.kazooClient.create("/cabbage/works", makepath=True) self.kazooClient.create("/cabbage/works/readies") if not self.kazooClient.isExistPath("/cabbage/monitor"): self.kazooClient.create("/cabbage/monitor", makepath=True) self.kazooClient.create("/cabbage/monitor/jobs") self.kazooClient.create("/cabbage/monitor/works") self.kazooClient.create("/cabbage/monitor/brokerServers") if not self.kazooClient.isExistPath("/cabbage/users"): self.kazooClient.create("/cabbage/users", makepath=True) if not self.kazooClient.isExistPath( "/cabbage/users/" + ConfigHolder.getConfig().getProperty(BASE, ADMIN_NAME)): self.store.saveUser( User(userName=ConfigHolder.getConfig().getProperty( BASE, ADMIN_NAME), userPwd=ConfigHolder.getConfig().getProperty( BASE, ADMIN_PWD), isAdmin=True)) if not self.kazooClient.isExistPath("/cabbage/queueServer"): self.kazooClient.create("/cabbage/queueServer/brokerServers", makepath=True) self.kazooClient.create( "/cabbage/queueServer/brokerServers/readies") self.kazooClient.create("/cabbage/queueServer/queues", makepath=True) os.environ.setdefault(CABBAGE, MASTER)
def serverScheduler(): registerMoniters() # checkResultScheduler = "%s %s * * *"%(int(getNowMinute())+1,getNowHour()) # JobManageHolder.getJobManage().addJob(checkResult,jobId="checkResult",cron="*/1 * * * *") JobManageHolder.getJobManage().addJob(monitorJob, jobId="monitor", cron="*/1 * * * *") resultUploadScheduler = ConfigHolder.getConfig().getProperty( BASE, RESULTE_UPLOAD_SCHEDULER) JobManageHolder.getJobManage().addJob(uploadServerScheduler, jobId="uploadServerScheduler", cron=resultUploadScheduler)
def __init__(self, cfgPath=None): path = ConfigHolder.getConfig().getProperty(BASE, CLIENT_FILE_DIRECTORY) self.kazooClient = ZookeeperClientHolder.getRetryClient() if not os.path.isdir(path): os.makedirs(path) if not self.kazooClient.isExistPath("/cabbage/jobs"): self.kazooClient.create("/cabbage/jobs", makepath=True) self.kazooClient.create("/cabbage/jobs/readies", makepath=True) if not self.kazooClient.isExistPath("/cabbage/works"): self.kazooClient.create("/cabbage/works", makepath=True) self.kazooClient.create("/cabbage/works/readies") registerClientEvent() self.status = OFF_LINE # self._initConifg() os.environ.setdefault(CABBAGE, NODE)
def save(self): try: nfsPath=ConfigHolder.getConfig().getProperty(BASE,NFS_DIRECTORY) dateStr = getNowDateStr() if self.jobId: localPath = getLocalFilesPath() dateStr = getNowDateStr() hour = getNowHour() if hour == 0:# 提交前一天的数据 dateStr = formatDate(subDay(getNow(),1),f="%Y%m%d") localPath = localPath+"/"+self.jobId+"/result/"+dateStr Logger.info( log, "upload file to nfs. jobId【%s】 date【%s】" % (self.jobId,dateStr)) if not os.path.isdir(localPath): return fileNames = os.listdir(localPath) if len(fileNames) == 0: return remoteDire=nfsPath+"/"+self.jobId+"/"+dateStr if not os.path.isdir(remoteDire): os.makedirs(remoteDire) # os.chmod(remoteDire,777) Logger.info(log,"hour:%s files:%s"%(hour,",".join(fileNames))) for fileName in fileNames: if hour != 0: if int(fileName) >= hour: continue newFileName = None if os.environ[CABBAGE] ==MASTER: newFileName = HOST_NAME+"_"+LOCAL_IP+"_"+MASTER+"_"+fileName else: newFileName = HOST_NAME+"_"+LOCAL_IP+"_"+NODE+"_"+fileName if os.path.isfile(localPath+"/"+fileName): shutil.move(localPath+"/"+fileName,remoteDire+"/"+newFileName) except Exception as e: Logger.exception(log)
def taskFailed(state, event, app): eventOutDic = event.copy() taskName = None try: taskId = event['uuid'] task = state.tasks.get(taskId) # # taskName = task.name if task and hasattr(task,'name') else None # # if hasattr(task,'kwargs') and task.kwargs is not None and JOB_ID in task.kwargs: # eventOutDic[JOB_ID] = eval(str(task.kwargs))[JOB_ID] # # if eventOutDic[JOB_ID] is None: # eventOutDic[JOB_ID] = TaskCacheHolder.getJobCache().get(taskName) # if taskName is None or eventOutDic[JOB_ID] is None : jobId, taskName = _getJobIdAndTaskName(taskId) eventOutDic[JOB_ID] = jobId job = CacheHolder.getCache().get(eventOutDic[JOB_ID], JOBS) brokerServer = job.brokerServer taskPath = ConfigHolder.getConfig().getProperty( BASE, TASK_FAILLOG_PATH) if not os.path.isdir(taskPath): os.makedirs(taskPath) dateStr = getNowDateStr() with open(taskPath + "/" + brokerServer + "_" + dateStr + ".log", "a+") as writer: writer.write(str(eventOutDic) + "\n") # with storeFactory.store() as store: # store.deleteTaskId(task.id) # StoreHolder.getRedisStaticStore().deleteTaskId(taskId) CabbageCounterHolder.getCabbageCounter().updateTaksFail( taskName, _getHostName(event)) except Exception as e: Logger.exception(log)
def run(self): clientDir = ConfigHolder.getConfig().getProperty( BASE, CLIENT_FILE_DIRECTORY) jobId = self.message.jobId jobDir = clientDir + "/" + jobId fileName = self.message.fileName fileContent = self.message.fileContent if os.path.exists(jobDir) is False: os.mkdir(jobDir) os.mkdir(jobDir + "/result") filePath = jobDir + "/" + fileName if os.path.exists(filePath): os.remove(filePath) f = open(filePath, "w") f.write(base64.decodestring(fileContent)) f.close() #need notify file is ready and all file is ready #then notify server this client to be ready exec job CacheHolder.getCache().put(fileName, DONE, jobId)
def save(self): hdfsPath=ConfigHolder.getConfig().getProperty(BASE,HDFS_ROOT_PATH) dateStr = getNowDateStr() if self.jobId: localPath = getLocalFilesPath() dateStr = getNowDateStr() hour = getNowHour() if hour == 0:# 提交前一天的数据 dateStr = formatDate(subDay(getNow(),1),f="%Y%m%d") p = localPath+"/"+self.jobId+"/result/"+dateStr Logger.debug( log, "upload file to hdfs. jobId【%s】 date【%s】" % (self.jobId,dateStr)) if not os.path.isdir(p): return fileNames = os.listdir(p) if len(fileNames) == 0: return client =HdfsClientHolder.getHdfsClient() remoteDire=hdfsPath+"/"+self.jobId if not client.isDirectory(remoteDire): client.mkdir(remoteDire) remoteDire= remoteDire+"/"+dateStr if not client.isDirectory(remoteDire): client.mkdir(remoteDire) Logger.info(log,"hour:%s files:%s"%(hour,",".join(fileNames))) for fileName in fileNames: if hour != 0: if int(fileName) >= hour: continue # if os.path.isfile(p+"/"+fileName): self.uploadToHdfs(client,localPath,self.jobId,hdfsPath,fileName,dateStr) os.remove(p+"/"+fileName)
def _initJobs(self, cabbage): store = StoreHolder.getRetryStore() jobs = store.getJobs() work = store.getWork(HOST_NAME) queues = work.queues routes = {} queues_celery = [] for que in queues: que = store.getQueue(que) queues_celery.append( Queue(que.queueName, Exchange(que.queueName), routing_key=que.queueName, queue_arguments={'x-max-priority': int(que.priority)})) for job in jobs: if job.status != JOB_DELETE and job.brokerQueue in queues: #fixbug 动态扩容时,缓存JOB if not CacheHolder.getCache().hasKey(job.jobId, JOBS): CacheHolder.getCache().put(job.jobId, job, JOBS) clientDir = ConfigHolder.getConfig().getProperty( BASE, CLIENT_FILE_DIRECTORY) path = clientDir + "/" + job.jobId if not os.path.isdir(path): # @FIX BUG 文件不同步 syncJob(job.jobId, store) self.addScriptJobId(job.jobId, cabbage) for taskName in job.tasks: que = store.getQueue(job.brokerQueue) routes[taskName] = { 'queue': que.queueName, 'routing_key': que.routingKey } log.info(routes) celeryconfig.CELERY_QUEUES = tuple(queues_celery) celeryconfig.CELERY_ROUTES = routes
def loadJobScript(self,jobId,store): ''' 将当前任务的脚本准备好 ''' job=store.getJob(jobId) clientDir = ConfigHolder.getConfig().getProperty(BASE,CLIENT_FILE_DIRECTORY) path = clientDir+"/"+jobId #不同步主节点文件 # if job.fileType == PYTHON: # if not os.path.isfile(path+"/"+job.fileName): # syncFile(job.fileName,jobId,FileRequestMessage.MAIN) # self.loadCeleryTask(path,job.fileName) if job.attachFiles: for attachFile in job.attachFiles: if attachFile.fileType == PYTHON: if not os.path.isfile(path+"/"+attachFile.fileName): syncFile(attachFile.fileName,jobId,FileRequestMessage.ATTACH) self.loadCeleryTask(path,attachFile.fileName) log.info("节点【%s】当前任务【%s】的脚本加载完" % (HOST_NAME,jobId))
def __init__(self): self.max = ConfigHolder.getConfig().getProperty( BASE, UPLOAD_HDFS_FILE_PROCESS_COUNT) self.executor = futures.ThreadPoolExecutor(max_workers=self.max) self.jobIdQueue = Queue.Queue() self.start()
def getClient(self,ip=ConfigHolder.getConfig().getProperty(BASE,REDIS_IP), port=ConfigHolder.getConfig().getProperty(BASE,REDIS_PORT), password=ConfigHolder.getConfig().getProperty(BASE,REDIS_PWD)): return RedisClient(ip=ip,port=port,password=password)
Created on 2016年7月13日 @author: hua ''' from cabbage.config import ConfigHolder from cabbage.constants import BASE from cabbage.utils.host_name import HOST_NAME, LOCAL_IP from kombu.entity import Exchange, Queue # CELERYD_POOL_RESTARTS = True CELERY_ALWAYS_EAGER = False CELERYD_LOG_LEVEL = "DEBUG" #CELERY_REDIRECT_STDOUTS_LEVEL="DEBUG" if ConfigHolder.getConfig().hasProperty(BASE, "celerydConcurrency"): CELERYD_CONCURRENCY = ConfigHolder.getConfig().getProperty( BASE, "celerydConcurrency") else: CELERYD_CONCURRENCY = 1 CELERY_IGNORE_RESULT = True # CELERY_TRACK_STARTED =True CELERY_SEND_TASK_SENT_EVENT = True CELERY_SEND_EVENTS = True #CELERY_RESULT_BACKEND = 'amqp' #CELERY_RESULT_EXCHANGE = 'celereyResults'
def getRetryClient(self): retry = KazooRetry(max_tries=1000,delay=0.1,backoff=2,max_jitter=0.8,max_delay=3600, ignore_expire=True) return KazooZookeeperClient(ConfigHolder.getConfig().getProperty(BASE,ZOOKEEPER),connection_retry=retry)
def getClient(self,connection_retry=None): return KazooZookeeperClient(ConfigHolder.getConfig().getProperty(BASE,ZOOKEEPER),connection_retry=connection_retry)
def start(self): jobProcessCount = ConfigHolder.getConfig().getProperty( BASE, UPLOAD_HDFS_FILE_PROCESS_COUNT) self.pool = multiprocessing.Pool(processes=int(jobProcessCount)) for i in range(int(jobProcessCount)): self.pool.apply_async(action, (self.jobIdQueue, ))