def __init__(self, host, port, name): global pyroLoopCondition global pyroUri setLogger("Node-%s" % host, port) self.node = Node(host, port, "Node" + name) for workerStr in Conf.getNodeAvailWorkers().split(","): worker = workerStr.split("|") self.node.availWorkers[worker[0]] = WorkerProcess( host, worker[0], worker[1]) for serviceStr in Conf.getNodeAvailServices().split(","): service = serviceStr.split("|") self.node.availServices[service[0]] = ServiceProcess( host, service[0], service[1]) self.node = self.node.calRes() def runNodeService(): global pyroUri, pyroLoopCondition d = Pyro4.Daemon(host=host, port=port) pyroUri = d.register(NodeService(self.node), self.node.name) d.requestLoop(checkPyroLoopCondition) spawn(runNodeService) time.sleep(1) logger.info("Node:%s started...uri:%s" % (self.node.name, pyroUri)) try: FactoryPatternExector.createPhysicalNode(self.node.calRes()) self.start() except KeyboardInterrupt: pyroLoopCondition = False logger.warning('stopped by KeyboardInterrupt') sys.exit(1)
def __init__(self,host, port, name): global pyroLoopCondition global pyroUri setLogger("Node-%s"%host,port) self.node = Node(host, port, "Node" + name) for workerStr in Conf.getNodeAvailWorkers().split(",") : worker = workerStr.split("|") self.node.availWorkers[worker[0]] = WorkerProcess(host,worker[0],worker[1]) for serviceStr in Conf.getNodeAvailServices().split(",") : service = serviceStr.split("|") self.node.availServices[service[0]] = ServiceProcess(host,service[0],service[1]) self.node = self.node.calRes() def runNodeService(): global pyroUri,pyroLoopCondition d = Pyro4.Daemon(host=host, port=port) pyroUri = d.register(NodeService(self.node), self.node.name) d.requestLoop(checkPyroLoopCondition) spawn(runNodeService) time.sleep(1) logger.info("Node:%s started...uri:%s" % (self.node.name,pyroUri)) try : FactoryPatternExector.createPhysicalNode(self.node.calRes()) self.start() except KeyboardInterrupt: pyroLoopCondition = False logger.warning('stopped by KeyboardInterrupt') sys.exit(1)
def start(self): context = zmq.Context(1) backend = context.socket(zmq.ROUTER) # ROUTER backend.bind("tcp://*:%s" % Conf.getNodePortForService()) # For services poll_services = zmq.Poller() poll_services.register(backend, zmq.POLLIN) heartbeat_at = time.time() + Conf.getExpiration() while checkPyroLoopCondition(): socks = dict(poll_services.poll((Conf.getExpiration() - 1) * 1000)) # Handle service activity on backend if socks.get(backend) == zmq.POLLIN: # Use service address for LRU routing frames = backend.recv_multipart() if not frames: break addressList = str(frames[0]).split(":") address = addressList[0] port = int(addressList[1]) serviceName = addressList[2] self.node.services.pop( "%s:%s:%s" % (address, port, serviceName), None) self.node.services["%s:%s:%s" % (address, port, serviceName)] = Service( address, port, serviceName) ov = FactoryPatternExector.createServiceNode( address, port, serviceName) msg = frames[1:] if len(msg) == 1: if msg[0] not in (Conf.PPP_READY, Conf.PPP_HEARTBEAT): logger.error("Invalid message from service: %s" % msg) if time.time() >= heartbeat_at: for service in self.node.services: msg = [service, Conf.PPP_HEARTBEAT] backend.send_multipart(msg) heartbeat_at = time.time() + Conf.HEARTBEAT_INTERVAL expired = [] t = time.time() for address, service in self.node.services.iteritems(): if t > service.expiry: # Worker expired expired.append(address) for address in expired: logger.info("Service expired: %s" % address) self.node.services.pop(address, None) #update node of factory FactoryPatternExector.updatePhysicalNode(self.node.calRes())
def __init__(self,warehouse,warehouse_result): self.warehouse = warehouse self.warehouse_result = warehouse_result self.kafka = KafkaClient(Conf.getWareHouseAddr()) self.producer = KeyedProducer(self.kafka) self.consumer = KafkaConsumer(self.warehouse, bootstrap_servers=[Conf.getWareHouseAddr()], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest')
def __init__(self, warehouse, warehouse_result): self.warehouse = warehouse self.warehouse_result = warehouse_result self.kafka = KafkaClient(Conf.getWareHouseAddr()) self.producer = KeyedProducer(self.kafka) self.consumer = KafkaConsumer( self.warehouse, bootstrap_servers=[Conf.getWareHouseAddr()], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest')
def initialize(self): if not self.name: self.name = "oc" + random_time_str() self.status = ManagerStatus(self.name) self.status.mode = self.mode if self.mode == 'standalone' and not self.options.workertype: logger.error( "when --mode is standalone, --workertype must be specified") sys.exit(2) if self.mode == 'factory' and not self.options.warehouse: logger.error( "when --mode is factory, --warehouse must be specified") sys.exit(2) if self.mode == 'local': self.scheduler = LocalScheduler(self) self.isLocal = True elif self.mode == 'process': self.scheduler = MultiProcessScheduler(self, self.options.parallel) self.isLocal = False elif self.mode == 'standalone': self.scheduler = StandaloneScheduler(self, self.options.workertype) self.isLocal = False elif self.mode == 'factory': self.scheduler = FactoryScheduler(self, Conf.getWareHouseAddr(), self.options.warehouse) self.isLocal = False elif self.mode == 'mesos': master = Conf.getMesosMaster() self.scheduler = MesosScheduler(self, master, self.options) self.isLocal = False else: logger.error( "error mode, --mode should be one of [local, process, standalone, factory, mesos]" ) sys.exit(1) if self.options.parallel: self.defaultParallelism = self.options.parallel else: self.defaultParallelism = self.scheduler.defaultParallelism() self.initialized = True
def start(self): context = zmq.Context(1) backend = context.socket(zmq.ROUTER) # ROUTER backend.bind("tcp://*:%s" % Conf.getNodePortForService()) # For services poll_services = zmq.Poller() poll_services.register(backend, zmq.POLLIN) heartbeat_at = time.time() + Conf.getExpiration() while checkPyroLoopCondition(): socks = dict(poll_services.poll((Conf.getExpiration()-1) * 1000)) # Handle service activity on backend if socks.get(backend) == zmq.POLLIN: # Use service address for LRU routing frames = backend.recv_multipart() if not frames: break addressList = str(frames[0]).split(":") address = addressList[0] port = int(addressList[1]) serviceName = addressList[2] self.node.services.pop("%s:%s:%s"%(address,port,serviceName),None) self.node.services["%s:%s:%s"%(address,port,serviceName)] = Service(address,port,serviceName) ov = FactoryPatternExector.createServiceNode(address,port,serviceName) msg = frames[1:] if len(msg) == 1: if msg[0] not in (Conf.PPP_READY, Conf.PPP_HEARTBEAT): logger.error("Invalid message from service: %s" % msg) if time.time() >= heartbeat_at: for service in self.node.services: msg = [service, Conf.PPP_HEARTBEAT] backend.send_multipart(msg) heartbeat_at = time.time() + Conf.HEARTBEAT_INTERVAL expired = [] t = time.time() for address,service in self.node.services.iteritems(): if t > service.expiry: # Worker expired expired.append(address) for address in expired: logger.info("Service expired: %s" % address) self.node.services.pop(address, None) #update node of factory FactoryPatternExector.updatePhysicalNode(self.node.calRes())
def start(self, isMaster, environ={}): if self.started: return logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ) self.isMaster = isMaster if isMaster: roots = Conf.getRootDir() if isinstance(roots, str): roots = roots.split(',') name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"), socket.gethostname(), os.getpid()) self.workdir = [os.path.join(root, name) for root in roots] for d in self.workdir: util.mkdir_p(d) self.environ['SERVER_URI'] = 'file://' + self.workdir[0] self.environ['WORKDIR'] = self.workdir self.environ['COMPRESS'] = util.COMPRESS else: self.environ.update(environ) if self.environ['COMPRESS'] != util.COMPRESS: raise Exception("no %s available" % self.environ['COMPRESS']) #self.ctx = zmq.Context() self.started = True logger.debug("env started")
def stopAll(self): filenames = self.serviceScript.split("/") filename = filenames[-1] cmd = os.path.join(Conf.getNodeServiceDir(), "service.sh") + " " + filename + " stopAll" logger.info(cmd) return os.system(cmd)
def check_kafka_events(): global loopCondition from kafka import KafkaConsumer, KafkaClient, SimpleProducer warehouse_addr = Conf.getWareHouseAddr() consumer = KafkaConsumer("%sResult"%wk.options.warehouse, bootstrap_servers=[warehouse_addr], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest') while loopCondition: for message in consumer.fetch_messages(): print "topic=%s, partition=%s, offset=%s, key=%s " % (message.topic, message.partition, message.offset, message.key) task = cPickle.loads(message.value) if task.state == Task.TASK_FINISHED: print "taskId:%s,success!!!:%s"%(task.id,task.result) else: print "taskId:%s,failed!!!"%task.id consumer.task_done(message) last_data_time = time.time() if not loopCondition: break
def stop(self,port): logger.info("stop") filenames = self.serviceScript.split("/") filename = filenames[-1] cmd = os.path.join(Conf.getNodeServiceDir(),"service.sh") + " " + filename + " stop " + str(port) logger.info(cmd) return os.system(cmd)
def stop(self, port): filenames = self.workerScript.split("/") filename = filenames[-1] cmd = os.path.join(Conf.getNodeWorkerDir(), "worker.sh") + " " + filename + " stop " + str(port) return os.system(cmd)
def __init__(self, host, port, serviceName, servers = None): self.master = False self.alwaysTry = Conf.getAlwaysTryLeader() self.serviceName = serviceName self.thisServer = "%s:%s" % (host, port) self.groupServers = servers self.queue = Queue.Queue() self.rpl = None
def stop(self, port): logger.info("stop") filenames = self.serviceScript.split("/") filename = filenames[-1] cmd = os.path.join( Conf.getNodeServiceDir(), "service.sh") + " " + filename + " stop " + str(port) logger.info(cmd) return os.system(cmd)
def completionEvents(self): if self.mode != "factory": while True: try: yield self.scheduler.completionEvents.get_nowait() self.scheduler.completionEvents.task_done() except Queue.Empty: if self.status.totalNum == self.status.finished_count + self.status.fail_count: break if self.mode == "factory": raise Exception("please consume results from warehouse [%s,%s]!" % (Conf.getWareHouseAddr(), self.options.warehouse))
def parse_options(): (options, args) = parser.parse_args() if not options: parser.print_help() sys.exit(2) if options.mem is None: options.mem = Conf.MEM_PER_TASK options.logLevel = (options.quiet and logging.ERROR or options.verbose and logging.DEBUG or logging.INFO) setLogger(__name__, options.name, options.logLevel) if options.config: if os.path.exists(options.config) and os.path.isfile(options.config): Conf.setConfigFile(options.config) else: logger.error("configuration file is not found. (%s)" % (options.config, )) sys.exit(2) return options
def server_static(filename, mime_type=None): ''''' Serves a file statically ''' if mime_type is None: mime_type = mimetypes.guess_type(filename)[0] web.header('Content-Type', bytes('%s' % mime_type)) filename = os.path.join(Conf.getWebStaticFullPath(), filename) if not os.path.exists(filename): raise web.NotFound() stat = os.stat(filename) web.header('Content-Length', '%s' % stat.st_size) web.header( 'Last-Modified', '%s' % web.http.lastmodified(datetime.datetime.fromtimestamp(stat.st_mtime))) return wsgiref.util.FileWrapper(open(filename, 'rb'), 16384)
def getService(cls, host, port, serviceName): uri = Conf.getUri(host, port, serviceName) return Pyro4.Proxy(uri)
def start(self): cmd = os.path.join(Conf.getNodeWorkerDir(),"worker.sh") + " " + self.workerScript + " start " + self.host return os.system(cmd)
def getAsynchronousService(cls, host, port, serviceName): uri = Conf.getUri(host, port, serviceName) return Pyro4.async(Pyro4.Proxy(uri))
def setConfigFile(filePath): Conf.setConfigFile(filePath)
for service in self.node.services: msg = [service, Conf.PPP_HEARTBEAT] backend.send_multipart(msg) heartbeat_at = time.time() + Conf.HEARTBEAT_INTERVAL expired = [] t = time.time() for address,service in self.node.services.iteritems(): if t > service.expiry: # Worker expired expired.append(address) for address in expired: logger.info("Service expired: %s" % address) self.node.services.pop(address, None) #update node of factory FactoryPatternExector.updatePhysicalNode(self.node.calRes()) #end while true #end def start() #end class NodeDademon if __name__ == "__main__" : def handler(signm, frame): global pyroLoopCondition pyroLoopCondition = False logger.warning("got signal %d, exit now", signm) sys.exit(1) signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGABRT, handler) NodeDademon("localhost",Conf.getNodeDefaultPort(),socket.gethostname())
def start(self): cmd = os.path.join( Conf.getNodeServiceDir(), "service.sh") + " " + self.serviceScript + " start " + self.host return os.system(cmd)
def startSlaveFactory(cls): servers = Conf.getFactoryServers() servs = servers.split(",") server = servs[1].split(":") cls.startFactory(server[0], int(server[1]), servs, Conf.getFactoryServiceName())
def start(self): cmd = os.path.join( Conf.getNodeWorkerDir(), "worker.sh") + " " + self.workerScript + " start " + self.host return os.system(cmd)
import sys import os import socket sys.path.extend( [os.path.join(os.path.abspath(os.path.dirname(__file__)), '..')]) from opencluster.nodedaemon import NodeDademon from opencluster.configuration import Conf if __name__ == "__main__": if len(sys.argv) != 2: print "Usage : %s LocalIP" % sys.argv[0] sys.exit(1) NodeDademon(sys.argv[1], Conf.getNodeDefaultPort(), "".join(sys.argv[1].split(".")))
def start_factory_mesos(): global pyroLoopCondition parser = OptionParser( usage="Usage: python factorymesos.py [options] <command>") parser.allow_interspersed_args = False parser.add_option("-s", "--master", type="string", default="", help="url of master (mesos://172.31.252.180:5050)") parser.add_option("-f", "--factory", type="string", default="", help="host:port of master (172.31.252.180:6666)") parser.add_option( "-w", "--warehouse_addr", type="string", default="", help= "kafka-172.31.252.182:9092|mysql-172.31.254.25:3306,db,username,password" ) parser.add_option("-p", "--task_per_node", type="int", default=0, help="max number of tasks on one node (default: 0)") parser.add_option("-I", "--image", type="string", help="image name for Docker") parser.add_option("-V", "--volumes", type="string", help="volumes to mount into Docker") parser.add_option("-r", "--retry", type="int", default=0, help="retry times when failed (default: 0)") parser.add_option( "-e", "--config", type="string", default="/work/opencluster/config.ini", help= "absolute path of configuration file(default:/work/opencluster/config.ini)" ) parser.add_option("-g", "--group", type="string", default='', help="which group to run (default: ''") parser.add_option( "-q", "--quiet", action="store_true", help="be quiet", ) parser.add_option( "-v", "--verbose", action="store_true", help="show more useful log", ) (options, command) = parser.parse_args() if not options: parser.print_help() sys.exit(2) if options.config: Conf.setConfigFile(options.config) options.master = options.master or Conf.getMesosMaster() options.warehouse_addr = options.warehouse_addr or Conf.getWareHouseAddr() servers = options.factory or Conf.getFactoryServers() servs = servers.split(",") server = servs[0].split(":") options.logLevel = (options.quiet and logging.ERROR or options.verbose and logging.DEBUG or logging.INFO) setLogger(Conf.getFactoryServiceName(), "MESOS", options.logLevel) implicitAcknowledgements = 1 if os.getenv("MESOS_EXPLICIT_ACKNOWLEDGEMENTS"): implicitAcknowledgements = 0 sched = FactoryMesos(options, command, implicitAcknowledgements) driver = MesosSchedulerDriver(sched, sched.framework, options.master, implicitAcknowledgements) driver.start() logger.debug("Mesos Scheudler driver started") warehouse_addrs = options.warehouse_addr.split(",") def fetchTasksFromMySQL(): global pyroLoopCondition mysqlIpAndPort = warehouse_addrs[0].split(":") last_data_time = time.time() while pyroLoopCondition: db = MySQLdb.connect(host=mysqlIpAndPort[0], port=int(mysqlIpAndPort[1]), db=warehouse_addrs[1], user=warehouse_addrs[2], passwd=warehouse_addrs[3]) try: cur = db.cursor() curUpt = db.cursor() dataResults = cur.execute( "select task_id,task_desc,task_start_time,status from t_task where status=0 order by priority asc limit 200" ) results = cur.fetchmany(dataResults) for r in results: sched.append_task(cPickle.loads(r[1])) curUpt.execute( "update t_task set task_start_time=now(),status=1 where task_id='" + r[0] + "'") if len(results) > 0: db.commit() last_data_time = time.time() driver.reviveOffers() if sched.tasks_total_len() > MAX_WAITING_TASK: time.sleep(2) if time.time() - last_data_time > MAX_EMPTY_TASK_PERIOD: time.sleep(10) if cur: cur.close() if curUpt: curUpt.close() finally: db.close() def fetchTasksFromKafka(priority): global pyroLoopCondition consumer = KafkaConsumer('OpenCluster%s' % priority, bootstrap_servers=[options.warehouse_addr], group_id="cnlab", auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest') last_data_time = time.time() while pyroLoopCondition: for message in consumer.fetch_messages(): logger.error("%s:%s:%s: key=%s " % (message.topic, message.partition, message.offset, message.key)) sched.append_task(cPickle.loads(message.value)) consumer.task_done(message) last_data_time = time.time() if sched.tasks_len(priority) > MAX_WAITING_TASK: time.sleep(2) if time.time() - last_data_time > MAX_EMPTY_TASK_PERIOD: time.sleep(10) if len(warehouse_addrs) > 2: spawn(fetchTasksFromMySQL) else: for i in range(1, sched.priority_size + 1): spawn(fetchTasksFromKafka, i) def handler(signm, frame): logger.warning("got signal %d, exit now", signm) sched.stop(3) signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGABRT, handler) try: while not sched.stopped: time.sleep(0.5) sched.check(driver) now = time.time() if now > sched.last_offer_time + 60 + random.randint(0, 5): logger.warning("too long to get offer, reviving...") sched.last_offer_time = now driver.reviveOffers() except KeyboardInterrupt: logger.warning( 'stopped by KeyboardInterrupt. The Program is exiting gracefully! Please wait...' ) sched.stop(4) #terminate pyrothread pyroLoopCondition = False time.sleep(5) driver.stop(False) sys.exit(sched.status)
def getAsynchronousService(cls, host, port, serviceName): uri = Conf.getUri(host, port, serviceName) return Pyro4. async (Pyro4.Proxy(uri))
expired = [] t = time.time() for address, service in self.node.services.iteritems(): if t > service.expiry: # Worker expired expired.append(address) for address in expired: logger.info("Service expired: %s" % address) self.node.services.pop(address, None) #update node of factory FactoryPatternExector.updatePhysicalNode(self.node.calRes()) #end while true #end def start() #end class NodeDademon if __name__ == "__main__": def handler(signm, frame): global pyroLoopCondition pyroLoopCondition = False logger.warning("got signal %d, exit now", signm) sys.exit(1) signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGABRT, handler) NodeDademon("localhost", Conf.getNodeDefaultPort(), socket.gethostname())
def stopAll(self): filenames = self.workerScript.split("/") filename = filenames[-1] cmd = os.path.join(Conf.getNodeWorkerDir(),"worker.sh") + " " + filename + " stopAll"; return os.system(cmd)
"Node", "/nodeWorker", "WorkerOperation", "/nodeService", "ServiceOperation", "/about/(about)", "About", "/about/(contact)", "About", ] urls.extend(apiUrls) urls.extend(["*", "WebHandler"]) app = web.application(tuple(urls), globals()) folder_templates_full_path = PWD + Conf.getWebTemplatesPath() def render(params={}, partial=True): global_vars = dict({'title': 'OpenCluster'}.items() + params.items()) if not partial: return web.template.render(folder_templates_full_path, globals=global_vars) else: return web.template.render(folder_templates_full_path, base='layout', globals=global_vars) def titled_render(subtitle=''): subtitle = subtitle + ' - ' if subtitle else ''
def start(self): cmd = os.path.join(Conf.getNodeServiceDir(),"service.sh") + " " + self.serviceScript + " start " + self.host return os.system(cmd)
def getDefaultFactory(cls): servers = Conf.getFactoryServers() servs = servers.split(",") server = servs[0].split(":") return cls.getFactory(server[0], int(server[1]), servs, Conf.getFactoryServiceName())
def stopAll(self): filenames = self.serviceScript.split("/") filename = filenames[-1] cmd = os.path.join(Conf.getNodeServiceDir(),"service.sh") + " " + filename + " stopAll"; logger.info(cmd) return os.system(cmd)
import sys import os import logging sys.path.extend( [os.path.join(os.path.abspath(os.path.dirname(__file__)), '..')]) from opencluster.ui.main import WebServer from opencluster.configuration import Conf, setLogger logger = logging.getLogger(__name__) if __name__ == "__main__": setLogger("OCWeb", "") thisServer = WebServer(Conf.getWebServers()) thisServer.start()
import sys import os import socket sys.path.extend([os.path.join(os.path.abspath(os.path.dirname(__file__)),'..')]) from opencluster.nodedaemon import NodeDademon from opencluster.configuration import Conf if __name__ == "__main__" : if len(sys.argv) != 2 : print "Usage : %s LocalIP" % sys.argv[0] sys.exit(1) NodeDademon(sys.argv[1],Conf.getNodeDefaultPort(),"".join(sys.argv[1].split(".")))
import sys import os import logging sys.path.extend([os.path.join(os.path.abspath(os.path.dirname(__file__)),'..')]) from opencluster.ui.main import WebServer from opencluster.configuration import Conf,setLogger logger = logging.getLogger(__name__) if __name__ == "__main__" : setLogger("OCWeb","") thisServer = WebServer(Conf.getWebServers()) thisServer.start()