def onConsumerCallback(self, params): if not self.running: return False if self.reloading: log.info("Replace task to updated.") self.task = self.updatedTask self.updatedTask = None self.reloading = False if params == None or len(params) == 0: log.error("No task params for '%s' (%s)" % (self.taskName, self.threadId)) try: start = time.time() self.task.handler(params) runTime = time.time() - start self.statSumProcessCount += 1 self.statSumProcessTime = runTime if self.statNextPeriod <= time.time(): log.debug("%s - %d messages. Avg. %f sec" % (self.taskName, self.statSumProcessCount, 1.0 * self.statSumProcessTime / self.statSumProcessCount)) self.statSumProcessCount = 0 self.statSumProcessTime = 0 self.statNextPeriod = time.time() + 30 except Exception, e: log.error("Task Exception - %s (%s) - %s" % (self.taskName, e, params)) worker.sentry.client.captureException(data=params) exc_type, exc_value, exc_traceback = sys.exc_info() log.error(traceback.format_exception(exc_type, exc_value, exc_traceback)) return False
def __loadDevTask(self): info = self.metaClient.getNodeInfo() info['devTasks'] = [] info['environment'] = self.environment for name in os.listdir(os.path.join(os.getcwd(), DIR_DEV_MODULE)): if not os.path.isdir(os.path.join(os.getcwd(), DIR_DEV_MODULE, name)): continue try: module = importlib.import_module(name) for e in dir(module): attr = getattr(module, e) if hasattr(attr, 'OW_TASK_SUBSCRIBE_EVENTS'): log.info("Load Task - %s" % attr.__name__) retryCnt = 3 while retryCnt > 0: """ start slots after request complete. because, AMQP communication affects zookeeper communication TODO: solve this problem. """ slot = self.slotPool.requestSlot('%s_dev' % attr.__name__, attr, auto_start=False) if slot != None: info['devTasks'].append(slot.taskName) self.metaClient.addDevTask(slot.taskName, slot.task) self.devTaskSlots.append(slot) break else: retryCnt -= 1 gevent.sleep(1) except ImportError, e: log.error("Import ERROR - [%s] %s" % (name, e))
def sendNodeEvent(self, node, event): try: log.info("Set node info - %s, %s" % (node, event['tasks'])) self.client.set('%s/event/worker/%s' % (self.rootpath, node), json.dumps(event)) except Exception, e: log.error(e) return False
def __init__(self, url, callback): self.url = url self.connection = connections[Connection(self.url)].acquire() log.info("Connection POOL - %s" % self.connection) self.exchanges = [] self.callback = callback self.msgCnt = 0 self.lastSleep = 0
def _run(self): log.info("start maintenance thread..") # connecting zookeeper try: self.metaClient.start() self.__checkMetaInfo() except Exception, e: log.error(e) raise e
def __eventWorker(self, data, event=None): try: if isinstance(data, (list, tuple)): log.info("NODES : %d" % len(data)) self.__updateWorkerStatus() except (kazoo.exceptions.SessionExpiredError, kazoo.exceptions.ConnectionLoss, kazoo.exceptions.ConnectionClosedError) as detail: gevent.spawn(self.__reconnect) except Exception, e: log.error(e)
def start(self): log.info("Start Worker [%s] - %s" % (id(self), time.time())) self.bLoop = True self.metaClient.open() if self.environment != 'production': try: self.__loadDevTask() except Exception ,e: log.error(e)
def command(self, cmd): if cmd is None or len(cmd) == 0: return if cmd == "pause": self.pause = True log.info("Pause MessageFeeder") elif cmd == "resume": self.pause = False log.info("Resume MessageFeeder") else: log.error("UNKNOWN COMMAND : %s" % cmd)
def requestSlot(self, name, handler, auto_start=True): """ assign task slot :param name: task name :param handler: task class :param auto_start: if 'True', call '.start()' automatically :return: slot instance """ log.info("REQUEST!!!!") slot = SlotHandler(name, handler(), self.urlAMQP, self) self.slots.append(slot) if auto_start: slot.start() return slot
def _run(self): self.threadId = threading.currentThread().getName() log.info("Start task slot - %s (%s)" % (self.taskName, self.threadId)) self.running = True if self.amqpProducer: # Non-blocking self.amqpProducer.start() if self.amqpConsumer: try: # blocking and consume messages self.amqpConsumer.start() except KeyboardInterrupt: log.error("CATCH KEYBOARD INTERRUPT in SLOT LOOP!!!!!") log.info("Exit task slot - %s (%s)" % (self.taskName, self.threadId))
def __init__(self, name, taskHandler, url, slotPool): """ initialize slot :param name: task name :param taskHandler: task handler instance :param connector: AMQP connector :param slotPool: slot pool :return: None """ Greenlet.__init__(self) self.slotPool = slotPool self.running = False self.reloading = False self.updatedTask = None self.taskName = name self.task = self.__prepareTaskHandler(taskHandler) self.event = taskHandler.OW_TASK_SUBSCRIBE_EVENTS self.useHashing = taskHandler.OW_USE_HASHING self.hashKey = taskHandler.OW_HASH_KEY self.urlAMQP = url self.statSumProcessTime = 0 self.statSumProcessCount = 0 self.statNextPeriod = time.time() + 30 self.amqpConsumer = consumer.AMQPConsumer(self.urlAMQP, self.onConsumerCallback) if len(taskHandler.OW_TASK_SUBSCRIBE_EVENTS) > 0 else None self.amqpProducer = producer.AMQPProducer(self.urlAMQP) if len(taskHandler.OW_TASK_PUBLISH_EVENTS) > 0 else None self.devTask = True if name.endswith('_dev') else False log.info("Create task slot - task: %s, event: %s, useHashing: %s, hashKey: %s, AMQP id: %s)" % (name, self.event, self.useHashing, self.hashKey, id(self.amqpConsumer))) if self.useHashing: exchange = "job.consistent.%s" % name exchangeType = "x-consistent-hash" queueName = settings.UUID.get_hex() weight = 10 else: exchange = "job.general" exchangeType = "direct" queueName = name weight = None for event in self.event: self.amqpConsumer.setupExchange(exchange, exchangeType, queueName, event, weight, self.devTask)
def get_consumers(self, Consumer, channel): log.info('get_consumers') consumerQueues = [] # if ex == 'consistent-hashing' : bind 'job.general' and ex with route key # if ex != 'consistent-hashing' : bind ex and q with route key try: for exchange in self.exchanges: ex = exchange['exchange'](channel) ex.declare() if exchange['exchange'].type == 'x-consistent-hash': ex.bind_to('job.general', exchange['key']) # TODO: update! # setup queue q = exchange['queue'] consumerQueues.append(q) except Exception, e: log.error(e)
def get(self, module): localModule = self.lockAndGetLocalModuleInfo() try: for mod in self.fs.find({'module': module}).sort('uploadDate', -1).limit(1): # need download? if (module not in localModule) or (str(mod._id) != localModule[module]['id']): log.info("Downloading %s" % module) # Download fd = open('%s/%s' % (DIR_TASK_MODULE, mod.filename), 'wb') fd.write(mod.read()) fd.close() mod.close() modulePath = os.path.join(DIR_TASK_MODULE, module) if os.path.exists(modulePath) and os.path.exists('%s/__init__.py' % modulePath): shutil.rmtree(modulePath) # Extract to module path tar = tarfile.open('%s/%s' % (DIR_TASK_MODULE, mod.filename), 'r:bz2') tar.extractall(DIR_TASK_MODULE) tar.close() # remove download file os.remove('%s/%s' % (DIR_TASK_MODULE, mod.filename)) localModule[module] = {'id': str(mod._id)} log.info("Download complete %s" % module) if (module not in self.modules) or (localModule[module]['id'] != self.modules[module]['id']): if module in sys.modules: #print "Re-loading %s - %s" % (module, os.getpid()) modObj = sys.modules[module] reload(modObj) else: #print "Loading %s - %s" % (module, os.getpid()) modObj = importlib.import_module(module, DIR_TASK_MODULE) self.modules[module] = localModule[module] except Exception, e: self.unlockAndUpdateModuleInfo() raise e
def __init__(self, collection, ordering=False): Greenlet.__init__(self) self.pause = False self.taskStatus = {} self.exchangeInfo = {} self.name = collection.name self.collection = collection self.doOrdering = ordering log.info("Fetch Message from MongoQueue - Collection : %s" % self.collection.name) # Parsing job server list self.amqp = AMQPProducer(settings.conf.get("server", "broker")) # Local event queue self.bLoop = False self.lastQueueCheckTime = 0 # Statsd self.ackMessage = partial(msgqueue.ackMessage, flushInterval=1)
def setupExchange(self, exchange, exchangeType, queue=None, routingKey=None, weight=None, ephemeral=False): log.info("setupExchange - %s, %s" % (exchange, exchangeType)) exParams = dict() qParams = dict() # exchange common setting exParams["name"] = exchange exParams["type"] = exchangeType exParams["delivery_mode"] = 2 if exchangeType == "x-consistent-hash": exParams["durable"] = True exParams["internal"] = True exParams["arguments"] = {"hash-header": "hashID"} exParams["auto_delete"] = True else: exParams["durable"] = True exParams["internal"] = False exParams["auto_delete"] = False ex = Exchange(**exParams) # setup queue qParams["name"] = queue if queue else "" qParams["exchange"] = ex if exchangeType == "x-consistent-hash": qParams["routing_key"] = str(weight) qParams["auto_delete"] = True qParams["exclusive"] = True else: qParams["routing_key"] = routingKey qParams["auto_delete"] = False qParams["exclusive"] = False if ephemeral: qParams["auto_delete"] = True q = Queue(**qParams) self.exchanges.append({"queue": q, "exchange": ex, "key": routingKey})
def setupExchange(self, exchange, exchangeType, queue=None, routingKey=None, weight=None, ephemeral=False): log.info("setupExchange - %s, %s" % (exchange, exchangeType)) exParams = dict() qParams = dict() # exchange common setting exParams['name'] = exchange exParams['type'] = exchangeType exParams['delivery_mode'] = 2 if exchangeType == "x-consistent-hash": exParams['durable'] = True exParams['internal'] = True exParams['arguments'] = {"hash-header": "hashID"} exParams['auto_delete'] = True else: exParams['durable'] = True exParams['internal'] = False exParams['auto_delete'] = False ex = Exchange(**exParams) # setup queue qParams['name'] = queue if queue else '' qParams['exchange'] = ex if exchangeType == "x-consistent-hash": qParams['routing_key'] = str(weight) qParams['auto_delete'] = True qParams['exclusive'] = True else: qParams['routing_key'] = routingKey qParams['auto_delete'] = False qParams['exclusive'] = False if ephemeral: qParams['auto_delete'] = True q = Queue(**qParams) self.exchanges.append({'queue': q, 'exchange': ex, 'key': routingKey})
def get_consumers(self, Consumer, channel): log.info("get_consumers") consumerQueues = [] # print len(self.exchanges) # print dir(channel) # print self.connection, channel.connection # if ex == 'consistent-hashing' : bind 'job.general' and ex with route key # if ex != 'consistent-hashing' : bind ex and q with route key try: for exchange in self.exchanges: ex = exchange["exchange"](channel) ex.declare() if exchange["exchange"].type == "x-consistent-hash": ex.bind_to("job.general", exchange["key"]) # TODO: update! # setup queue q = exchange["queue"] consumerQueues.append(q) except Exception, e: log.error(e)
self.scheduler.enter(10, 1, self.__checkMongo, ()) def _run(self): log.info("start maintenance thread..") # connecting zookeeper try: self.metaClient.start() self.__checkMetaInfo() except Exception, e: log.error(e) raise e try: svrConfig = json.loads(self.metaClient.get(self.rootpath + '/henem/config')[0]) log.info("Config - %s" % json.dumps(svrConfig)) config.update(svrConfig, keepOrgValue=True) except Exception, e: log.error(e) log.info("Service Configuration from local and Zookeeper") log.info(config.dumps()) self.urlQueue = config.get('source', 'mgqueue') try: self.mongoClient = pymongo.MongoClient(self.urlQueue) self.db = self.mongoClient.get_default_database() except Exception, e: log.error(self.urlQueue) log.error(e) raise e
try: (event, evtObj) = self.msgQueue.get(block=False) except Queue.Empty: #log.debug("EMPTY!") self.metaClient.ping() gevent.sleep(1) continue except Exception, e: log.error(e) except KeyboardInterrupt: break try: if event == events.evtModuleUpdate: if self.moduleClient: log.info("Task module updated, Process downloading.. - %s" % evtObj) self.moduleClient.open() self.moduleClient.get(evtObj) info = self.metaClient.getModuleInfo(evtObj) self.taskList.update(dict(map(lambda x: (x, evtObj), info['tasks']))) self.moduleClient.close() module = sys.modules[evtObj] taskHandlers = map(lambda x: getattr(module, x), info['tasks']) self.slotPool.reloadSlotTaskFromModule(taskHandlers) elif event == events.evtModuleRemove: if evtObj in self.moduleClient.getModuleList(): log.info("Task module removed, Process remove.. - %s" % evtObj) removedTasks = filter(lambda x: self.taskList[x] == evtObj, self.taskList.keys())
msgqueue.flushAcks(self.collection) # queue wait time if sentCount["sent"] < 100: sleep(1) log.info( "MONGO %s PROCESSED [%0.2f] - %d/%d (%s)" % ( self.name, time.time() - startFeed, sentCount.pop("sent", 0), sentCount.pop("wait", 0), ", ".join(map(lambda (k, v): "%s: %s" % (k, v), sentCount.iteritems())), ) ) except pymongo.errors.AutoReconnect, e: log.info("MongoDB AutoReconnect") pass except KeyboardInterrupt: log.info("Catch keyboard interrupt in msgfeeder") self.bLoop = False except Exception, e: log.error(e) log.info("Exit %s Feeder Thread" % self.name) def stop(self): log.info("Stop %s Data Manager.." % self.name) self.bLoop = False self.amqp.stop() def updateWorkerStatus(self, info):
def stop(self): log.info("Stop %s Data Manager.." % self.name) self.bLoop = False self.amqp.stop()
def _run(self): self.bLoop = True self.amqp.start() while self.bLoop: try: print self.taskStatus # no worker, no process if len(self.taskStatus) == 0 or len(filter(lambda (t, w): w == 0, self.taskStatus.items())) > 0: log.warn("Found not assigned task. - %s" % filter(lambda (t, w): w == 0, self.taskStatus.items())) sleep(1) continue """ Process MongoDB Queue """ if self.pause: sleep(1) continue if len(self.exchangeInfo) == 0: sleep(1) log.warn("No Message exchange info.") continue sentCount = Counter() startFeed = time.time() fetcher = msgqueue.burstFetchMessages(self.collection, limit=2000) if fetcher.count() == 0: sleep(1) log.info("NO DATA.") continue for msg in fetcher: if not self.bLoop: msgqueue.flushAcks(self.collection) break try: msg["_id"] = str(msg["_id"]) if "time" in msg and isinstance(msg["time"], datetime.datetime): msg["time"] = str(msg["time"]) if "log_time" in msg and isinstance(msg["log_time"], datetime.datetime): msg["log_time"] = str(msg["log_time"]) except Exception, e: self.ackMessage(self.collection, msg["_id"]) log.error(e) continue if msg.get("log_type", None) == "debugreport": self.ackMessage(self.collection, msg["_id"]) continue eventId = LOG_TYPE_MAP.get(msg["log_type"], None) if not eventId: log.error("Undefined Log type - %s" % msg) self.ackMessage(self.collection, msg["_id"]) continue sentCount[eventId] += 1 if eventId not in self.exchangeInfo: # Wait only ANS/AAT Log. if eventId in ["evtPlayerLog", "evtNetworkLog"]: log.warn("WARNING!!!! no worker wait %s event!!!!!" % eventId) sleep(1) continue else: # TODO: temporary, remember!!!! self.ackMessage(self.collection, msg["_id"]) continue pass events = self.exchangeInfo[eventId] try: if ( self.amqp.publish( events["exchange"], events["routing"], msg, {"hashID": msg[events["hashKey"]]} if events["hashKey"] else None, ) == True ): self.ackMessage(self.collection, msg["_id"]) sentCount["sent"] += 1 else: print "!" continue except KeyError: # delete message self.ackMessage(self.collection, msg["_id"]) except Exception, e: log.error("Publish error - %s" % e)
def stop(self): log.info("stop maintenance thread..") self.running = False for evt in self.scheduler.queue: self.scheduler.cancel(evt)