def __init__(self): rpc_pb2_grpc.WorkerServicer.__init__(self) etcdaddr = env.getenv("ETCD") logger.info("using ETCD %s" % etcdaddr) clustername = env.getenv("CLUSTER_NAME") logger.info("using CLUSTER_NAME %s" % clustername) # init etcdlib client try: self.etcdclient = etcdlib.Client(etcdaddr, prefix=clustername) except Exception: logger.error( "connect etcd failed, maybe etcd address not correct...") sys.exit(1) else: logger.info("etcd connected") # get master ip and report port [success, masterip] = self.etcdclient.getkey("service/master") if not success: logger.error("Fail to get master ip address.") sys.exit(1) else: self.master_ip = masterip logger.info("Get master ip address: %s" % (self.master_ip)) self.master_port = env.getenv('BATCH_MASTER_PORT') self.imgmgr = imagemgr.ImageMgr() self.fspath = env.getenv('FS_PREFIX') self.confpath = env.getenv('DOCKLET_CONF') self.taskmsgs = [] self.msgslock = threading.Lock() self.report_interval = 2 self.lock = threading.Lock() self.mount_lock = threading.Lock() self.cons_gateway = env.getenv('BATCH_GATEWAY') self.cons_ips = env.getenv('BATCH_NET') logger.info("Batch gateway ip address %s" % self.cons_gateway) logger.info("Batch ip pools %s" % self.cons_ips) self.cidr = 32 - int(self.cons_ips.split('/')[1]) self.ipbase = ip_to_int(self.cons_ips.split('/')[0]) self.free_ips = [] for i in range(2, (1 << self.cidr) - 1): self.free_ips.append(i) logger.info("Free ip addresses pool %s" % str(self.free_ips)) self.gpu_lock = threading.Lock() self.gpu_status = {} gpus = gputools.get_gpu_status() for gpu in gpus: self.gpu_status[gpu['id']] = "" self.start_report() logger.info('TaskController init success')
def __init__(self): rpc_pb2_grpc.WorkerServicer.__init__(self) etcdaddr = env.getenv("ETCD") logger.info ("using ETCD %s" % etcdaddr ) clustername = env.getenv("CLUSTER_NAME") logger.info ("using CLUSTER_NAME %s" % clustername ) # init etcdlib client try: self.etcdclient = etcdlib.Client(etcdaddr, prefix = clustername) except Exception: logger.error ("connect etcd failed, maybe etcd address not correct...") sys.exit(1) else: logger.info("etcd connected") # get master ip and report port [success,masterip] = self.etcdclient.getkey("service/master") if not success: logger.error("Fail to get master ip address.") sys.exit(1) else: self.master_ip = masterip logger.info("Get master ip address: %s" % (self.master_ip)) self.master_port = env.getenv('BATCH_MASTER_PORT') # get worker ip self.worker_ip = getip(env.getenv('NETWORK_DEVICE')) logger.info("Worker ip is :%s"%self.worker_ip) self.imgmgr = imagemgr.ImageMgr() self.fspath = env.getenv('FS_PREFIX') self.confpath = env.getenv('DOCKLET_CONF') self.rm_all_batch_containers() self.taskmsgs = [] self.msgslock = threading.Lock() self.report_interval = 2 self.lock = threading.Lock() self.mount_lock = threading.Lock() self.gpu_lock = threading.Lock() self.gpu_status = {} gpus = gputools.get_gpu_status() for gpu in gpus: self.gpu_status[gpu['id']] = "" self.start_report() logger.info('TaskWorker init success')
logger.info("using CLUSTER_NAME %s" % G_clustername) # get network interface net_dev = env.getenv("NETWORK_DEVICE") logger.info("using NETWORK_DEVICE %s" % net_dev) ipaddr = network.getip(net_dev) if ipaddr == False: logger.error("network device is not correct") sys.exit(1) else: logger.info("using ipaddr %s" % ipaddr) # init etcdlib client try: etcdclient = etcdlib.Client(etcdaddr, prefix=G_clustername) except Exception: logger.error("connect etcd failed, maybe etcd address not correct...") sys.exit(1) mode = 'recovery' if len(sys.argv) > 1 and sys.argv[1] == "new": mode = 'new' # get public IP and set public Ip in etcd public_IP = env.getenv("PUBLIC_IP") etcdclient.setkey("machines/publicIP/" + ipaddr, public_IP) # do some initialization for mode: new/recovery if mode == 'new': # clean and initialize the etcd table if etcdclient.isdir(""):