def send_activated_email(to_address, username): email_from_address = settings.get('EMAIL_FROM_ADDRESS') if (email_from_address in ['\'\'', '\"\"', '']): return #text = 'Dear '+ username + ':\n' + ' Your account in docklet has been activated' text = '<html><h4>Dear ' + username + ':</h4>' text += '''<p> Your account in <a href='%s'>%s</a> has been activated</p> <p> Enjoy your personal workspace in the cloud !</p> <br> <p> Note: DO NOT reply to this email!</p> <br><br> <p> <a href='http://docklet.unias.org'>Docklet Team</a>, SEI, PKU</p> ''' % (env.getenv("PORTAL_URL"), env.getenv("PORTAL_URL")) text += '<p>' + str(datetime.now()) + '</p>' text += '</html>' subject = 'Docklet account activated' msg = MIMEMultipart() textmsg = MIMEText(text, 'html', 'utf-8') msg['Subject'] = Header(subject, 'utf-8') msg['From'] = email_from_address msg['To'] = to_address msg.attach(textmsg) s = smtplib.SMTP() s.connect() s.sendmail(email_from_address, to_address, msg.as_string()) s.close()
def send_beans_email(to_address, username, beans): email_from_address = settings.get('EMAIL_FROM_ADDRESS') if (email_from_address in ['\'\'', '\"\"', '']): return #text = 'Dear '+ username + ':\n' + ' Your beans in docklet are less than' + beans + '.' text = '<html><h4>Dear ' + username + ':</h4>' text += '''<p> Your beans in <a href='%s'>docklet</a> are %d now. </p> <p> If your beans are less than or equal to 0, all your worksapces will be stopped.</p> <p> Please apply for more beans to keep your workspaces running by following link:</p> <p> <a href='%s/beans/application/'>%s/beans/application/</p> <br> <p> Note: DO NOT reply to this email!</p> <br><br> <p> <a href='http://docklet.unias.org'>Docklet Team</a>, SEI, PKU</p> ''' % (env.getenv("PORTAL_URL"), beans, env.getenv("PORTAL_URL"), env.getenv("PORTAL_URL")) text += '<p>' + str(datetime.datetime.now()) + '</p>' text += '</html>' subject = 'Docklet beans alert' msg = MIMEMultipart() textmsg = MIMEText(text, 'html', 'utf-8') msg['Subject'] = Header(subject, 'utf-8') msg['From'] = email_from_address msg['To'] = to_address msg.attach(textmsg) s = smtplib.SMTP() s.connect() s.sendmail(email_from_address, to_address, msg.as_string()) s.close()
def send_beans_email(to_address, username, beans): email_from_address = settings.get('EMAIL_FROM_ADDRESS') if (email_from_address in ['\'\'', '\"\"', '']): return #text = 'Dear '+ username + ':\n' + ' Your beans in docklet are less than' + beans + '.' text = '<html><h4>Dear '+ username + ':</h4>' text += '''<p> Your beans in <a href='%s'>docklet</a> are %d now. </p> <p> If your beans are less than or equal to 0, all your worksapces will be stopped.</p> <p> Please apply for more beans to keep your workspaces running by following link:</p> <p> <a href='%s/beans/application/'>%s/beans/application/</p> <br> <p> Note: DO NOT reply to this email!</p> <br><br> <p> <a href='http://docklet.unias.org'>Docklet Team</a>, SEI, PKU</p> ''' % (env.getenv("PORTAL_URL"), beans, env.getenv("PORTAL_URL"), env.getenv("PORTAL_URL")) text += '<p>'+ str(datetime.datetime.now()) + '</p>' text += '</html>' subject = 'Docklet beans alert' msg = MIMEMultipart() textmsg = MIMEText(text,'html','utf-8') msg['Subject'] = Header(subject, 'utf-8') msg['From'] = email_from_address msg['To'] = to_address msg.attach(textmsg) s = smtplib.SMTP() s.connect() s.sendmail(email_from_address, to_address, msg.as_string()) s.close()
def send_activated_email(to_address, username): email_from_address = settings.get('EMAIL_FROM_ADDRESS') if (email_from_address in ['\'\'', '\"\"', '']): return #text = 'Dear '+ username + ':\n' + ' Your account in docklet has been activated' text = '<html><h4>Dear '+ username + ':</h4>' text += '''<p> Your account in <a href='%s'>%s</a> has been activated</p> <p> Enjoy your personal workspace in the cloud !</p> <br> <p> Note: DO NOT reply to this email!</p> <br><br> <p> <a href='http://docklet.unias.org'>Docklet Team</a>, SEI, PKU</p> ''' % (env.getenv("PORTAL_URL"), env.getenv("PORTAL_URL")) text += '<p>'+ str(datetime.now()) + '</p>' text += '</html>' subject = 'Docklet account activated' msg = MIMEMultipart() textmsg = MIMEText(text,'html','utf-8') msg['Subject'] = Header(subject, 'utf-8') msg['From'] = email_from_address msg['To'] = to_address msg.attach(textmsg) s = smtplib.SMTP() s.connect() s.sendmail(email_from_address, to_address, msg.as_string()) s.close()
def __init__(self, vclustermgr, ulockmgr, check_interval=_ONE_DAY_IN_SECONDS): threading.Thread.__init__(self) self.thread_stop = False self.vclustermgr = vclustermgr self.ulockmgr = ulockmgr self.check_interval = check_interval self.warning_days = int(env.getenv("WARNING_DAYS")) self.release_days = int(env.getenv("RELEASE_DAYS")) if self.release_days <= self.warning_days: self.release_days = self.warning_days+1 logger.info("[ReleaseMgr] start withe warning_days=%d release_days=%d"%(self.warning_days, self.release_days))
def TaskControllerServe(): max_threads = int(env.getenv('BATCH_MAX_THREAD_WORKER')) worker_port = int(env.getenv('BATCH_WORKER_PORT')) logger.info("Max Threads on a worker is %d" % max_threads) server = grpc.server(futures.ThreadPoolExecutor(max_workers=max_threads)) rpc_pb2_grpc.add_WorkerServicer_to_server(TaskController(), server) server.add_insecure_port('[::]:'+str(worker_port)) server.start() logger.info("Start TaskController Servicer on port:%d" % worker_port) try: while True: time.sleep(_ONE_DAY_IN_SECONDS) except KeyboardInterrupt: server.stop(0)
def _send_email(self, to_address, username, vcluster, days, is_released=True): email_from_address = settings.get('EMAIL_FROM_ADDRESS') if (email_from_address in ['\'\'', '\"\"', '']): return text = '<html><h4>Dear ' + username + ':</h4>' st_str = vcluster.stop_time.strftime("%Y-%m-%d %H:%M:%S") text += '''<p> Your workspace/vcluster(name:%s id:%d) in <a href='%s'>%s</a> has been stopped more than %d days now(stopped at:%s). </p> ''' % (vcluster.clustername, vcluster.clusterid, env.getenv("PORTAL_URL"), env.getenv("PORTAL_URL"), days, st_str) if is_released: text += '''<p> Therefore, the workspace/vcluster has been released now.</p> <p> <b>And the data in it couldn't be recoverd</b> unless you save it.</p> <p> You can create new workspace/vcluster if you need.</p> ''' else: #day_d = self.release_days - (datetime.datetime.now() - vcluster.stop_time).days release_date = vcluster.stop_time + datetime.timedelta( days=self.release_days) day_d = (release_date - datetime.datetime.now()).days rd_str = release_date.strftime("%Y-%m-%d %H:%M:%S") text += '''<p> It will be released after <b>%s(in about %d days)</b>.</p> <p> <b>And the data in it couldn't be recoverd after releasing.</b></p> <p> Please start or save it before <b>%s(in about %d days)</b> if you want to keep the data.</p> ''' % (rd_str, day_d, rd_str, day_d) text += '''<br> <p> Note: DO NOT reply to this email!</p> <br><br> <p> <a href='http://docklet.unias.org'>Docklet Team</a>, SEI, PKU</p> ''' subject = 'Docklet workspace/vcluster releasing alert' msg = MIMEMultipart() textmsg = MIMEText(text, 'html', 'utf-8') msg['Subject'] = Header(subject, 'utf-8') msg['From'] = email_from_address msg['To'] = to_address msg.attach(textmsg) s = smtplib.SMTP() s.connect() try: s.sendmail(email_from_address, to_address, msg.as_string()) except Exception as err: logger.error(traceback.format_exc()) s.close()
def __init__(self, nodemgr, monitor_fetcher, master_ip, scheduler_interval=2, external_logger=None): threading.Thread.__init__(self) self.thread_stop = False self.jobmgr = None self.master_ip = master_ip self.task_queue = [] self.lazy_append_list = [] self.lazy_delete_list = [] self.lazy_stop_list = [] self.task_queue_lock = threading.Lock() self.stop_lock = threading.Lock() self.add_lock = threading.Lock() #self.user_containers = {} self.scheduler_interval = scheduler_interval self.logger = logger self.master_port = env.getenv('BATCH_MASTER_PORT') self.worker_port = env.getenv('BATCH_WORKER_PORT') # nodes self.nodemgr = nodemgr self.monitor_fetcher = monitor_fetcher self.cpu_usage = {} self.gpu_usage = {} # self.all_nodes = None # self.last_nodes_info_update_time = 0 # self.nodes_info_update_interval = 30 # (s) self.gpu_pending_tasks = {} self.network_lock = threading.Lock() batch_net = env.getenv('BATCH_NET') self.batch_cidr = int(batch_net.split('/')[1]) batch_net = batch_net.split('/')[0] task_cidr = int(env.getenv('BATCH_TASK_CIDR')) task_cidr = min(task_cidr, 31 - self.batch_cidr) self.task_cidr = max(task_cidr, 2) self.base_ip = ip_to_int(batch_net) self.free_nets = [] for i in range(0, (1 << (32 - self.batch_cidr)) - 1, (1 << self.task_cidr)): self.free_nets.append(i) #self.logger.info("Free nets addresses pool %s" % str(self.free_nets)) self.logger.info("Each Batch Net CIDR:%s" % (str(self.task_cidr)))
def __init__(self, addr, etcdclient): self.addr = addr self.etcd = etcdclient self.libpath = env.getenv('DOCKLET_LIB') self.confpath = env.getenv('DOCKLET_CONF') self.fspath = env.getenv('FS_PREFIX') # set jupyter running dir in container self.rundir = "/home/jupyter" # set root running dir in container self.nodehome = "/root" self.lxcpath = "/var/lib/lxc" self.imgmgr = imagemgr.ImageMgr() self.historymgr = History_Manager()
def __init__(self, taskmgr): logger.info("Init jobmgr...") try: Batchjob.query.all() except: db.create_all(bind='__all__') self.job_map = {} self.taskmgr = taskmgr self.fspath = env.getenv('FS_PREFIX') self.lock = threading.Lock() self.userpoint = "http://" + env.getenv('USER_IP') + ":" + str(env.getenv('USER_PORT')) self.auth_key = env.getenv('AUTH_KEY') self.recover_jobs()
def TaskControllerServe(): max_threads = int(env.getenv('BATCH_MAX_THREAD_WORKER')) worker_port = int(env.getenv('BATCH_WORKER_PORT')) logger.info("Max Threads on a worker is %d" % max_threads) server = grpc.server(futures.ThreadPoolExecutor(max_workers=max_threads)) rpc_pb2_grpc.add_WorkerServicer_to_server(TaskController(), server) server.add_insecure_port('[::]:' + str(worker_port)) server.start() logger.info("Start TaskController Servicer on port:%d" % worker_port) try: while True: time.sleep(_ONE_DAY_IN_SECONDS) except KeyboardInterrupt: server.stop(0)
def mail_notification(self, notify_id): email_from_address = settings.get('EMAIL_FROM_ADDRESS') if (email_from_address in ['\'\'', '\"\"', '']): return {'success': 'true'} notify = Notification.query.filter_by(id=notify_id).first() notify_groups = NotificationGroups.query.filter_by( notification_id=notify_id).all() to_addr = [] groups = [] for group in notify_groups: groups.append(group.group_name) if 'all' in groups: users = User.query.all() for user in users: to_addr.append(user.e_mail) else: for group in notify_groups: users = User.query.filter_by(user_group=group.group_name).all() for user in users: to_addr.append(user.e_mail) content = notify.content text = '<html><h4>Dear ' + 'user' + ':</h4>' #user.username + ':</h4>' text += '''<p> Your account in <a href='%s'>%s</a> has been recieved a notification:</p> <p>%s</p> <br> <p> Note: DO NOT reply to this email!</p> <br><br> <p> <a href='http://docklet.unias.org'>Docklet Team</a>, SEI, PKU</p> ''' % (env.getenv("PORTAL_URL"), env.getenv("PORTAL_URL"), content) text += '<p>' + str(datetime.utcnow()) + '</p>' text += '</html>' subject = 'Docklet Notification: ' + notify.title msg = MIMEMultipart() textmsg = MIMEText(text, 'html', 'utf-8') msg['Subject'] = Header(subject, 'utf-8') msg['From'] = email_from_address msg.attach(textmsg) s = smtplib.SMTP() s.connect() for address in to_addr: try: msg['To'] = address s.sendmail(email_from_address, address, msg.as_string()) except Exception as e: logger.error(e) s.close() return {"success": 'true'}
def release_port_mapping(container_name, container_ip, container_port): global free_ports global allocated_ports global ports_lock if container_name not in allocated_ports.keys(): return [False, "This container does not have a port mapping."] free_port = allocated_ports[container_name][container_port] public_ip = env.getenv("PUBLIC_IP") try: subprocess.run([ 'iptables', '-t', 'nat', '-D', 'PREROUTING', '-p', 'tcp', '--dport', str(free_port), "-j", "DNAT", '--to-destination', '%s:%s' % (container_ip, container_port) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, check=True) except subprocess.CalledProcessError as suberror: return [ False, "release port mapping failed : %s" % suberror.stdout.decode('utf-8') ] ports_lock.acquire() free_ports[free_port] = True allocated_ports[container_name].pop(container_port) ports_lock.release() return [True, ""]
def copyImage(self,user,image,token,target): path = "/opt/docklet/global/images/private/"+user+"/" '''image_info_file = open(path+"."+image+".info", 'r') [createtime, isshare] = image_info_file.readlines() recordshare = isshare isshare = "unshared" image_info_file.close() image_info_file = open(path+"."+image+".info", 'w') image_info_file.writelines([createtime, isshare]) image_info_file.close()''' try: sys_run('ssh root@%s "mkdir -p %s"' % (target,path)) sys_run('scp %s%s.tz root@%s:%s' % (path,image,target,path)) #sys_run('scp %s.%s.description root@%s:%s' % (path,image,target,path)) #sys_run('scp %s.%s.info root@%s:%s' % (path,image,target,path)) resimage = Image.query.filter_by(ownername=user,imagename=image).first() auth_key = env.getenv('AUTH_KEY') url = "http://" + target + ":" + master_port + "/image/copytarget/" data = {"token":token,"auth_key":auth_key,"user":user,"imagename":image,"description":resimage.description} result = requests.post(url, data=data).json() logger.info("Response from target master: " + str(result)) except Exception as e: logger.error(e) '''image_info_file = open(path+"."+image+".info", 'w') image_info_file.writelines([createtime, recordshare]) image_info_file.close()''' return {'success':'false', 'message':str(e)} '''image_info_file = open(path+"."+image+".info", 'w') image_info_file.writelines([createtime, recordshare]) image_info_file.close()''' logger.info("copy image %s of %s to %s success" % (image,user,target)) return {'success':'true', 'action':'copy image'}
def report(taskid, instanceid, status, token): global taskmgr master_port = env.getenv('BATCH_MASTER_PORT') channel = grpc.insecure_channel('%s:%s' % ('0.0.0.0', master_port)) stub = MasterStub(channel) response = stub.report(ReportMsg(taskmsgs=[TaskMsg(taskid=taskid, username='******', vnodeid=instanceid, subTaskStatus=status, token=token)]))
def register(self, *args, **kwargs): ''' Usage: register(user = modified_from_newuser()) ''' if (kwargs['user'].username == None or kwargs['user'].username == ''): return {"success":'false', "reason": "Empty username"} user_check = User.query.filter_by(username = kwargs['user'].username).first() if (user_check != None and user_check.status != "init"): #for the activating form return {"success":'false', "reason": "Unauthorized action"} newuser = kwargs['user'] if (user_check != None and (user_check.status == "init")): db.session.delete(user_check) db.session.commit() else: newuser.password = hashlib.sha512(newuser.password.encode('utf-8')).hexdigest() db.session.add(newuser) db.session.commit() # if newuser status is normal, init some data for this user # now initialize for all kind of users #if newuser.status == 'normal': path = env.getenv('DOCKLET_LIB') subprocess.call([path+"/master/userinit.sh", newuser.username]) res = self.groupQuery(name=newuser.user_group) if res['success']: self.set_nfs_quota(newuser.username,res['data']['data']) return {"success":'true'}
def rentServers(self, number): instanceids = [] eipids = [] eipaddrs = [] for i in range(int(number)): instanceids.append(self.createInstance()) time.sleep(2) time.sleep(10) for i in range(int(number)): [eipid, eipaddr] = self.createEIP() eipids.append(eipid) eipaddrs.append(eipaddr) time.sleep(2) masterip = env.getenv('ETCD').split(':')[0] for i in range(int(number)): self.associateEIP(instanceids[i], eipids[i]) time.sleep(2) time.sleep(5) for instanceid in instanceids: self.startInstance(instanceid) time.sleep(2) time.sleep(10) while not self.isStarted(instanceids): time.sleep(10) time.sleep(5) return [masterip, eipaddrs]
def rentServers(self,number): instanceids=[] eipids=[] eipaddrs=[] for i in range(int(number)): instanceids.append(self.createInstance()) time.sleep(2) time.sleep(10) for i in range(int(number)): [eipid,eipaddr]=self.createEIP() eipids.append(eipid) eipaddrs.append(eipaddr) time.sleep(2) masterip=env.getenv('ETCD').split(':')[0] for i in range(int(number)): self.associateEIP(instanceids[i],eipids[i]) time.sleep(2) time.sleep(5) for instanceid in instanceids: self.startInstance(instanceid) time.sleep(2) time.sleep(10) while not self.isStarted(instanceids): time.sleep(10) time.sleep(5) return [masterip, eipaddrs]
def migrate_host(self, src_host, new_host_list, ulockmgr): [status, vcluster_list] = self.get_all_clusterinfo() if not status: return [False, vcluster_list] auth_key = env.getenv('AUTH_KEY') res = post_to_user("/master/user/groupinfo/", {'auth_key':auth_key}) groups = json.loads(res['groups']) quotas = {} for group in groups: quotas[group['name']] = group['quotas'] for vcluster in vcluster_list: if 'ownername' not in vcluster.keys(): return [Flase, 'Ownername not in vcluster(%s).keys' % str(vcluster) ] try: username = vcluster['ownername'] ulockmgr.acquire(username) clustername = vcluster['clustername'] rc_info = post_to_user("/master/user/recoverinfo/", {'username':username,'auth_key':auth_key}) groupname = rc_info['groupname'] user_info = {"data":{"id":rc_info['uid'],"groupinfo":quotas[groupname]}} self.migrate_cluster(clustername, username, src_host, new_host_list, user_info) except Exception as ex: ulockmgr.release(username) logger.error(traceback.format_exc()) return [False, str(ex)] ulockmgr.release(username) return [True, ""]
def __init__(self, networkmgr, etcdclient, addr, mode): self.addr = addr logger.info ("begin initialize on %s" % self.addr) self.networkmgr = networkmgr self.etcd = etcdclient self.mode = mode self.workerport = env.getenv('WORKER_PORT') self.tasks = {} # delete the existing network logger.info ("delete the existing network") [success, bridges] = ovscontrol.list_bridges() if success: for bridge in bridges: if bridge.startswith("docklet-br"): ovscontrol.del_bridge(bridge) else: logger.error(bridges) '''if self.mode == 'new': if netcontrol.bridge_exists('docklet-br'): netcontrol.del_bridge('docklet-br') netcontrol.new_bridge('docklet-br') else: if not netcontrol.bridge_exists('docklet-br'): logger.error("docklet-br not found") sys.exit(1)''' # get allnodes self.allnodes = self._nodelist_etcd("allnodes") self.runnodes = [] self.batchnodes = [] self.allrunnodes = [] [status, runlist] = self.etcd.listdir("machines/runnodes") for node in runlist: nodeip = node['key'].rsplit('/',1)[1] if node['value'] == 'ok': logger.info ("running node %s" % nodeip) self.runnodes.append(nodeip) logger.info ("all nodes are: %s" % self.allnodes) logger.info ("run nodes are: %s" % self.runnodes) # start new thread to watch whether a new node joins logger.info ("start thread to watch new nodes ...") self.thread_watchnewnode = threading.Thread(target=self._watchnewnode) self.thread_watchnewnode.start() # wait for all nodes joins # while(True): for i in range(10): allin = True for node in self.allnodes: if node not in self.runnodes: allin = False break if allin: logger.info("all nodes necessary joins ...") break time.sleep(1) logger.info ("run nodes are: %s" % self.runnodes)
def register(self, *args, **kwargs): ''' Usage: register(user = modified_from_newuser()) ''' if (kwargs['user'].username == None or kwargs['user'].username == ''): return {"success": 'false', "reason": "Empty username"} user_check = User.query.filter_by( username=kwargs['user'].username).first() if (user_check != None and user_check.status != "init"): #for the activating form return {"success": 'false', "reason": "Unauthorized action"} newuser = kwargs['user'] if (user_check != None and (user_check.status == "init")): db.session.delete(user_check) db.session.commit() else: newuser.password = hashlib.sha512( newuser.password.encode('utf-8')).hexdigest() db.session.add(newuser) db.session.commit() # if newuser status is normal, init some data for this user # now initialize for all kind of users #if newuser.status == 'normal': path = env.getenv('DOCKLET_LIB') subprocess.call([path + "/master/userinit.sh", newuser.username]) res = self.groupQuery(name=newuser.user_group) if res['success']: self.set_nfs_quota(newuser.username, res['data']['data']) return {"success": 'true'}
def net_billings(self, username, now_bytes_total): global monitor_vnodes if not username in self.net_lastbillings.keys(): self.net_lastbillings[username] = 0 elif int(now_bytes_total / self.bytes_per_beans) < self.net_lastbillings[username]: self.net_lastbillings[username] = 0 diff = int(now_bytes_total / self.bytes_per_beans) - self.net_lastbillings[username] if diff > 0: auth_key = env.getenv('AUTH_KEY') data = { "owner_name": username, "billing": diff, "auth_key": auth_key } header = {'Content-Type': 'application/x-www-form-urlencoded'} http = Http() [resp, content ] = http.request("http://" + self.master_ip + "/billing/beans/", "POST", urlencode(data), headers=header) logger.info("response from master:" + content.decode('utf-8')) self.net_lastbillings[username] += diff monitor_vnodes[username]['net_stats'][ 'net_billings'] = self.net_lastbillings[username]
def post(self): if (request.form['username']): data = {"user": request.form['username'], "key": request.form['password'], 'ip': request.remote_addr} result = dockletRequest.unauthorizedpost('/login/', data) ok = result and result.get('success', None) if (ok and (ok == "true")): # set cookie:docklet-jupyter-cookie for jupyter notebook resp = make_response(redirect(request.args.get('next',None) or '/dashboard/')) app_key = os.environ['APP_KEY'] resp.set_cookie('docklet-jupyter-cookie', cookie_tool.generate_cookie(request.form['username'], app_key)) # set session for docklet session['username'] = request.form['username'] session['nickname'] = result['data']['nickname'] session['description'] = result['data']['description'] session['avatar'] = '/static/avatar/'+ result['data']['avatar'] session['usergroup'] = result['data']['group'] session['status'] = result['data']['status'] session['token'] = result['data']['token'] return resp else: if (env.getenv('EXTERNAL_LOGIN') == 'True'): url = external_generate.external_login_url link = external_generate.external_login_link else: link = '' url = '' loginMsg = result.get('message', '') return render_template(self.template_path, loginMsg=loginMsg, link = link, url = url, open_registry=self.open_registry) else: return redirect('/login/')
def getalldesc(self): masterips = self.post_to_all() res = {} for masterip in masterips: mastername = getname(masterip) res[mastername] = env.getenv(mastername + "_desc") return res
def collect_gpuinfo(self): # load gpu price batch_gpu_billing = env.getenv("BATCH_GPU_BILLING") gpu_price = {} default_gpu_price = 100 # /cores*h if batch_gpu_billing: # examples: default:100,GeForce-GTX-1080-Ti:100,GeForce-GTX-2080-Ti:150,Tesla-V100-PCIE-16GB:200 billing_configs = batch_gpu_billing.split(',') for config in billing_configs: config_sp = config.split(':') if config_sp[0] == 'default': default_gpu_price = int(config_sp[1]) else: gpu_price[config_sp[0]] = int(config_sp[1]) # reload gpu info if self.gpu_info_count == 0 or self.gpu_info_cache is None: self.gpu_info_cache = gputools.get_gpu_status() gpu_names = gputools.get_gpu_names() for index in range(len(self.gpu_info_cache)): if index < len(gpu_names): self.gpu_info_cache[index]['name'] = gpu_names[index] self.gpu_info_cache[index]['price'] = gpu_price.get( gpu_names[index], default_gpu_price) self.gpu_info_count = (self.gpu_info_count + 1) % 5 return self.gpu_info_cache
def getalldesc(self): masterips = self.post_to_all() res={} for masterip in masterips: mastername = getname(masterip) res[mastername]=env.getenv(mastername+"_desc") return res
def mail_notification(self, notify_id): email_from_address = settings.get('EMAIL_FROM_ADDRESS') if (email_from_address in ['\'\'', '\"\"', '']): return {'success' : 'true'} notify = Notification.query.filter_by(id=notify_id).first() notify_groups = NotificationGroups.query.filter_by(notification_id=notify_id).all() to_addr = [] groups = [] for group in notify_groups: groups.append(group.group_name) if 'all' in groups: users = User.query.all() for user in users: to_addr.append(user.e_mail) else: for group in notify_groups: users = User.query.filter_by(user_group=group.group_name).all() for user in users: to_addr.append(user.e_mail) content = notify.content text = '<html><h4>Dear '+ 'user' + ':</h4>' #user.username + ':</h4>' text += '''<p> Your account in <a href='%s'>%s</a> has been recieved a notification:</p> <p>%s</p> <br> <p> Note: DO NOT reply to this email!</p> <br><br> <p> <a href='http://docklet.unias.org'>Docklet Team</a>, SEI, PKU</p> ''' % (env.getenv("PORTAL_URL"), env.getenv("PORTAL_URL"), content) text += '<p>'+ str(datetime.utcnow()) + '</p>' text += '</html>' subject = 'Docklet Notification: ' + notify.title msg = MIMEMultipart() textmsg = MIMEText(text,'html','utf-8') msg['Subject'] = Header(subject, 'utf-8') msg['From'] = email_from_address msg.attach(textmsg) s = smtplib.SMTP() s.connect() for address in to_addr: try: msg['To'] = address s.sendmail(email_from_address, address, msg.as_string()) except Exception as e: logger.error(e) s.close() return {"success": 'true'}
def wrapper(*args, **kwargs): key_1 = env.getenv('AUTH_KEY') key_2 = request.form.get("auth_key",None) #logger.info(str(ip) + " " + str(G_userip)) if key_2 is not None and key_1 == key_2: return func(*args, **kwargs) else: return json.dumps({'success':'false','message': 'auth_key is required!'})
def gen_hosts(self): username = self.username taskid = self.id logger.info("Generate hosts for user(%s) task(%s) base_ip(%s)"%(username,taskid,str(self.task_base_ip))) fspath = env.getenv('FS_PREFIX') if not os.path.isdir("%s/global/users/%s" % (fspath,username)): path = env.getenv('DOCKLET_LIB') subprocess.call([path+"/master/userinit.sh", username]) logger.info("user %s directory not found, create it" % username) hosts_file = open("%s/global/users/%s/hosts/%s.hosts" % (fspath,username,"batch-"+taskid),"w") hosts_file.write("127.0.0.1 localhost\n") i = 0 for ip in self.ips: hosts_file.write(ip+" batch-"+str(i)+"\n") i += 1 hosts_file.close()
def ip_to_rpc(self,ip): if ip in self.allrunnodes: return xmlrpc.client.ServerProxy("http://%s:%s" % (ip, env.getenv("WORKER_PORT"))) else: logger.info('Worker %s is not connected, create rpc client failed, push task into queue') if not ip in self.tasks: self.tasks[ip] = [] return self.tasks[ip]
def billing_beans(): logger.info("handle request: /billing/beans/") form = request.form owner_name = form.get("owner_name", None) billing = int(form.get("billing", None)) if owner_name is None or billing is None: return json.dumps({ 'success': 'false', 'message': 'owner_name and beans fields are required.' }) G_lockmgr.acquire('__beans_' + str(owner_name)) # update users' tables in database owner = User.query.filter_by(username=owner_name).first() if owner is None: logger.warning("Error!!! Billing User %s doesn't exist!" % (owner_name)) else: #logger.info("Billing User:"******" " + str(owner.beans)) if oldbeans > 0 and owner.beans <= 0 or oldbeans >= 100 and owner.beans < 100 or oldbeans >= 500 and owner.beans < 500 or oldbeans >= 1000 and owner.beans < 1000: # send mail to remind users of their beans if their beans decrease to 0,100,500 and 1000 data = { "to_address": owner.e_mail, "username": owner.username, "beans": owner.beans } # request_master("/beans/mail/",data) beansapplicationmgr.send_beans_email(owner.e_mail, owner.username, int(owner.beans)) try: db.session.commit() except Exception as err: db.session.rollback() logger.warning(traceback.format_exc()) logger.warning(err) G_lockmgr.release('__beans_' + str(owner_name)) return json.dumps({ 'success': 'false', 'message': 'Fail to wirte to databases.' }) #logger.info("Billing User:"******"The beans of User(" + str(owner) + ") are less than or equal to zero, all his or her vclusters will be stopped." ) auth_key = env.getenv('AUTH_KEY') form = {'username': owner.username, 'auth_key': auth_key} request_master("/cluster/stopall/", form) G_lockmgr.release('__beans_' + str(owner_name)) return json.dumps({'success': 'true'})
def initlogging(name='docklet'): # Deafults global logger homepath = env.getenv('FS_PREFIX') LOG_FILENAME = homepath + '/local/log/' + name + '.log' LOG_LEVEL = env.getenv('WEB_LOG_LEVEL') if LOG_LEVEL == "DEBUG": LOG_LEVEL = logging.DEBUG elif LOG_LEVEL == "INFO": LOG_LEVEL = logging.INFO elif LOG_LEVEL == "WARNING": LOG_LEVEL = logging.WARNING elif LOG_LEVEL == "ERROR": LOG_LEVEL = logging.ERROR elif LOG_LEVEL == "CRITICAL": LOG_LEVEL = logging.CRITIAL else: LOG_LEVEL = logging.DEBUG logger = logging.getLogger(name) # Configure logging to log to a file, making a new file at midnight and keeping the last 3 day's data # Give the logger a unique name (good practice) # Set the log level to LOG_LEVEL logger.setLevel(LOG_LEVEL) # Make a handler that writes to a file, making a new file at midnight and keeping 3 backups handler = logging.handlers.TimedRotatingFileHandler(LOG_FILENAME, when="midnight", backupCount=0, encoding='utf-8') # Format each log message like this formatter = logging.Formatter( '%(asctime)s %(levelname)-8s %(module)s[%(lineno)d] %(message)s') # Attach the formatter to the handler handler.setFormatter(formatter) # Attach the handler to the logger logger.addHandler(handler) # Replace stdout with logging to file at INFO level sys.stdout = RedirectLogger(logger, logging.INFO) # Replace stderr with logging to file at ERROR level sys.stderr = RedirectLogger(logger, logging.ERROR)
def __init__(self): rpc_pb2_grpc.WorkerServicer.__init__(self) etcdaddr = env.getenv("ETCD") logger.info("using ETCD %s" % etcdaddr) clustername = env.getenv("CLUSTER_NAME") logger.info("using CLUSTER_NAME %s" % clustername) # init etcdlib client try: self.etcdclient = etcdlib.Client(etcdaddr, prefix=clustername) except Exception: logger.error( "connect etcd failed, maybe etcd address not correct...") sys.exit(1) else: logger.info("etcd connected") # get master ip and report port [success, masterip] = self.etcdclient.getkey("service/master") if not success: logger.error("Fail to get master ip address.") sys.exit(1) else: self.master_ip = masterip logger.info("Get master ip address: %s" % (self.master_ip)) self.master_port = env.getenv('BATCH_MASTER_PORT') self.imgmgr = imagemgr.ImageMgr() self.fspath = env.getenv('FS_PREFIX') self.confpath = env.getenv('DOCKLET_CONF') self.taskmsgs = [] self.msgslock = threading.Lock() self.report_interval = 2 self.lock = threading.Lock() self.mount_lock = threading.Lock() self.cons_gateway = env.getenv('BATCH_GATEWAY') self.cons_ips = env.getenv('BATCH_NET') logger.info("Batch gateway ip address %s" % self.cons_gateway) logger.info("Batch ip pools %s" % self.cons_ips) self.cidr = 32 - int(self.cons_ips.split('/')[1]) self.ipbase = ip_to_int(self.cons_ips.split('/')[0]) self.free_ips = [] for i in range(2, (1 << self.cidr) - 1): self.free_ips.append(i) logger.info("Free ip addresses pool %s" % str(self.free_ips)) self.gpu_lock = threading.Lock() self.gpu_status = {} gpus = gputools.get_gpu_status() for gpu in gpus: self.gpu_status[gpu['id']] = "" self.start_report() logger.info('TaskController init success')
def migrate_cluster(): global G_vclustermgr global G_ulockmgr user = request.form.get('username', None) if user is None: return json.dumps({'success': 'false', 'message': 'User is required!'}) clustername = request.form.get('clustername', None) if (clustername == None): return json.dumps({ 'success': 'false', 'message': 'clustername is null' }) new_hosts = request.form.get('new_hosts', None) if (new_hosts == None): return json.dumps({'success': 'false', 'message': 'new_hosts is null'}) new_host_list = new_hosts.split(',') G_ulockmgr.acquire(user) auth_key = env.getenv('AUTH_KEY') try: logger.info( "handle request : migrate cluster to %s. user:%s clustername:%s" % (str(new_hosts), user, clustername)) res = post_to_user("/master/user/groupinfo/", {'auth_key': auth_key}) groups = json.loads(res['groups']) quotas = {} for group in groups: #logger.info(group) quotas[group['name']] = group['quotas'] rc_info = post_to_user("/master/user/recoverinfo/", { 'username': user, 'auth_key': auth_key }) groupname = rc_info['groupname'] user_info = { "data": { "id": rc_info['uid'], "groupinfo": quotas[groupname] } } logger.info( "Migrate cluster for user(%s) cluster(%s) to new_hosts(%s). user_info(%s)" % (clustername, user, str(new_host_list), user_info)) [status, msg] = G_vclustermgr.migrate_cluster(clustername, user, new_host_list, user_info) if not status: logger.error(msg) return json.dumps({'success': 'false', 'message': msg}) return json.dumps({'success': 'true', 'action': 'migrate_container'}) except Exception as ex: logger.error(traceback.format_exc()) return json.dumps({'success': 'false', 'message': str(ex)}) finally: G_ulockmgr.release(user)
def __init__(self, nodemgr, networkmgr, etcdclient, addr, mode, distributedgw='False'): self.mode = mode self.distributedgw = distributedgw self.nodemgr = nodemgr self.imgmgr = imagemgr.ImageMgr() self.networkmgr = networkmgr self.addr = addr self.etcd = etcdclient self.defaultsize = env.getenv("CLUSTER_SIZE") self.fspath = env.getenv("FS_PREFIX") self.clusterid_locks = threading.Lock() # check database try: Container.query.all() PortMapping.query.all() VCluster.query.all() except: # create database db.create_all() logger.info ("vcluster start on %s" % (self.addr)) if self.mode == 'new': logger.info ("starting in new mode on %s" % (self.addr)) # check if all clusters data are deleted in httprest.py clean = True usersdir = self.fspath+"/global/users/" vclusters = VCluster.query.all() if len(vclusters) != 0: clean = False for user in os.listdir(usersdir): if len(os.listdir(usersdir+user+"/hosts")) > 0: clean = False if not clean: logger.error ("clusters files not clean, start failed") sys.exit(1) elif self.mode == "recovery": logger.info ("starting in recovery mode on %s" % (self.addr)) self.recover_allclusters() else: logger.error ("not supported mode:%s" % self.mode) sys.exit(1)
def __init__(self): rpc_pb2_grpc.WorkerServicer.__init__(self) etcdaddr = env.getenv("ETCD") logger.info ("using ETCD %s" % etcdaddr ) clustername = env.getenv("CLUSTER_NAME") logger.info ("using CLUSTER_NAME %s" % clustername ) # init etcdlib client try: self.etcdclient = etcdlib.Client(etcdaddr, prefix = clustername) except Exception: logger.error ("connect etcd failed, maybe etcd address not correct...") sys.exit(1) else: logger.info("etcd connected") # get master ip and report port [success,masterip] = self.etcdclient.getkey("service/master") if not success: logger.error("Fail to get master ip address.") sys.exit(1) else: self.master_ip = masterip logger.info("Get master ip address: %s" % (self.master_ip)) self.master_port = env.getenv('BATCH_MASTER_PORT') self.imgmgr = imagemgr.ImageMgr() self.fspath = env.getenv('FS_PREFIX') self.confpath = env.getenv('DOCKLET_CONF') self.taskmsgs = [] self.msgslock = threading.Lock() self.report_interval = 2 self.lock = threading.Lock() self.mount_lock = threading.Lock() self.cons_gateway = env.getenv('BATCH_GATEWAY') self.cons_ips = env.getenv('BATCH_NET') logger.info("Batch gateway ip address %s" % self.cons_gateway) logger.info("Batch ip pools %s" % self.cons_ips) self.cidr = 32 - int(self.cons_ips.split('/')[1]) self.ipbase = ip_to_int(self.cons_ips.split('/')[0]) self.free_ips = [] for i in range(2, (1 << self.cidr) - 1): self.free_ips.append(i) logger.info("Free ip addresses pool %s" % str(self.free_ips)) self.gpu_lock = threading.Lock() self.gpu_status = {} gpus = gputools.get_gpu_status() for gpu in gpus: self.gpu_status[gpu['id']] = "" self.start_report() logger.info('TaskController init success')
def __init__(self, nodemgr, monitor_fetcher, master_ip, scheduler_interval=2, external_logger=None): threading.Thread.__init__(self) self.thread_stop = False self.jobmgr = None self.master_ip = master_ip self.task_queue = [] self.lazy_append_list = [] self.lazy_delete_list = [] self.lazy_stop_list = [] self.task_queue_lock = threading.Lock() self.stop_lock = threading.Lock() self.add_lock = threading.Lock() #self.user_containers = {} self.scheduler_interval = scheduler_interval self.logger = logger self.master_port = env.getenv('BATCH_MASTER_PORT') self.worker_port = env.getenv('BATCH_WORKER_PORT') # nodes self.nodemgr = nodemgr self.monitor_fetcher = monitor_fetcher self.cpu_usage = {} self.gpu_usage = {} # self.all_nodes = None # self.last_nodes_info_update_time = 0 # self.nodes_info_update_interval = 30 # (s) self.network_lock = threading.Lock() batch_net = env.getenv('BATCH_NET') self.batch_cidr = int(batch_net.split('/')[1]) batch_net = batch_net.split('/')[0] task_cidr = int(env.getenv('BATCH_TASK_CIDR')) task_cidr = min(task_cidr,31-self.batch_cidr) self.task_cidr = max(task_cidr,2) self.base_ip = ip_to_int(batch_net) self.free_nets = [] for i in range(0, (1 << (32-self.batch_cidr)) - 1, (1 << self.task_cidr)): self.free_nets.append(i) self.logger.info("Free nets addresses pool %s" % str(self.free_nets)) self.logger.info("Each Batch Net CIDR:%s"%(str(self.task_cidr)))
def get(self): if is_authenticated(): refreshInfo() return redirect(request.args.get('next',None) or '/dashboard/') if (env.getenv('EXTERNAL_LOGIN') == 'True'): url = external_generate.external_login_url link = external_generate.external_login_link else: link = '' url = '' return render_template(self.template_path, loginMsg="", link = link, url = url, open_registry=self.open_registry)
def gen_hosts(self): username = self.username taskid = self.id logger.info("Generate hosts for user(%s) task(%s) base_ip(%s)" % (username, taskid, str(self.task_base_ip))) fspath = env.getenv('FS_PREFIX') if not os.path.isdir("%s/global/users/%s" % (fspath, username)): path = env.getenv('DOCKLET_LIB') subprocess.call([path + "/master/userinit.sh", username]) logger.info("user %s directory not found, create it" % username) hosts_file = open( "%s/global/users/%s/hosts/%s.hosts" % (fspath, username, "batch-" + taskid), "w") hosts_file.write("127.0.0.1 localhost\n") i = 0 for ip in self.ips: hosts_file.write(ip + " batch-" + str(i) + "\n") i += 1 hosts_file.close()
def acquire_port_mapping(container_name, container_ip, container_port, host_port=None): global free_ports global allocated_ports global ports_lock ports_lock.acquire() # if container_name in allocated_ports.keys(): # return [False, "This container already has a port mapping."] if container_name not in allocated_ports.keys(): allocated_ports[container_name] = {} elif container_port in allocated_ports[container_name].keys(): ports_lock.release() return [False, "This container port already has a port mapping."] if container_name == "" or container_ip == "" or container_port == "": ports_lock.release() return [False, "Node Name or Node IP or Node Port can't be null."] #print("acquire_port_mapping1") free_port = 1 if host_port is not None: # recover from host_port free_port = int(host_port) else: # acquire new free port while free_port <= 65535: if free_ports[free_port]: break free_port += 1 if free_port == 65536: ports_lock.release() return [False, "No free ports."] free_ports[free_port] = False allocated_ports[container_name][container_port] = free_port public_ip = env.getenv("PUBLIC_IP") ports_lock.release() try: subprocess.run([ 'iptables', '-t', 'nat', '-A', 'PREROUTING', '-p', 'tcp', '--dport', str(free_port), "-j", "DNAT", '--to-destination', '%s:%s' % (container_ip, container_port) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False, check=True) return [True, str(free_port)] except subprocess.CalledProcessError as suberror: return [ False, "set port mapping failed : %s" % suberror.stdout.decode('utf-8') ]
class external_loginView(normalView): if (env.getenv('EXTERNAL_LOGIN') == 'True'): template_path = external_generate.html_path @classmethod def post(self): return render_template(self.template_path) @classmethod def get(self): return self.post()
def __init__(self): settingPath = env.getenv('FS_PREFIX') + '/local/settings.conf' if not os.path.exists(settingPath): settingFile = open(settingPath,'w') setting = {} settingFile.write(json.dumps(setting)) settingFile.close() else: settingFile = open(settingPath, 'r') settingText = settingFile.read() settingFile.close() self.setting = json.loads(settingText)
def config_prepare(content): content = content.replace("%ROOTFS%",rootfs) content = content.replace("%HOSTNAME%","batch-%s" % str(instanceid)) content = content.replace("%CONTAINER_MEMORY%",str(quota.memory)) content = content.replace("%CONTAINER_CPU%",str(quota.cpu*100000)) content = content.replace("%FS_PREFIX%",self.fspath) content = content.replace("%LXCSCRIPT%",env.getenv("LXC_SCRIPT")) content = content.replace("%USERNAME%",username) content = content.replace("%LXCNAME%",lxcname) content = content.replace("%IP%",ip) content = content.replace("%GATEWAY%",self.cons_gateway) return content
def __init__(self): settingPath = env.getenv('FS_PREFIX') + '/local/settings.conf' if not os.path.exists(settingPath): settingFile = open(settingPath, 'w') setting = {} settingFile.write(json.dumps(setting)) settingFile.close() else: settingFile = open(settingPath, 'r') settingText = settingFile.read() settingFile.close() self.setting = json.loads(settingText)
def initlogging(name='docklet'): # Deafults global logger homepath = env.getenv('FS_PREFIX') LOG_FILENAME = homepath + '/local/log/' + name + '.log' LOG_LEVEL = env.getenv('WEB_LOG_LEVEL') if LOG_LEVEL == "DEBUG": LOG_LEVEL = logging.DEBUG elif LOG_LEVEL == "INFO": LOG_LEVEL = logging.INFO elif LOG_LEVEL == "WARNING": LOG_LEVEL = logging.WARNING elif LOG_LEVEL == "ERROR": LOG_LEVEL = logging.ERROR elif LOG_LEVEL == "CRITICAL": LOG_LEVEL = logging.CRITIAL else: LOG_LEVEL = logging.DEBUG logger = logging.getLogger(name) # Configure logging to log to a file, making a new file at midnight and keeping the last 3 day's data # Give the logger a unique name (good practice) # Set the log level to LOG_LEVEL logger.setLevel(LOG_LEVEL) # Make a handler that writes to a file, making a new file at midnight and keeping 3 backups handler = logging.handlers.TimedRotatingFileHandler(LOG_FILENAME, when="midnight", backupCount=0, encoding='utf-8') # Format each log message like this formatter = logging.Formatter('%(asctime)s %(levelname)-8s %(module)s[%(lineno)d] %(message)s') # Attach the formatter to the handler handler.setFormatter(formatter) # Attach the handler to the logger logger.addHandler(handler) # Replace stdout with logging to file at INFO level sys.stdout = RedirectLogger(logger, logging.INFO) # Replace stderr with logging to file at ERROR level sys.stderr = RedirectLogger(logger, logging.ERROR)
def send_remind_activating_email(username): #admin_email_address = env.getenv('ADMIN_EMAIL_ADDRESS') nulladdr = ['\'\'', '\"\"', ''] email_from_address = settings.get('EMAIL_FROM_ADDRESS') admin_email_address = settings.get('ADMIN_EMAIL_ADDRESS') if (email_from_address in nulladdr or admin_email_address in nulladdr): return #text = 'Dear '+ username + ':\n' + ' Your account in docklet has been activated' text = '<html><h4>Dear ' + 'admin' + ':</h4>' text += '''<p> An activating request for %s in <a href='%s'>%s</a> has been sent</p> <p> Please check it !</p> <br/><br/> <p> Docklet Team, SEI, PKU</p> ''' % (username, env.getenv("PORTAL_URL"), env.getenv("PORTAL_URL")) text += '<p>' + str(datetime.utcnow()) + '</p>' text += '</html>' subject = 'An activating request in Docklet has been sent' if admin_email_address[0] == '"': admins_addr = admin_email_address[1:-1].split(" ") else: admins_addr = admin_email_address.split(" ") alladdr = "" for addr in admins_addr: alladdr = alladdr + addr + ", " alladdr = alladdr[:-2] msg = MIMEMultipart() textmsg = MIMEText(text, 'html', 'utf-8') msg['Subject'] = Header(subject, 'utf-8') msg['From'] = email_from_address msg['To'] = alladdr msg.attach(textmsg) s = smtplib.SMTP() s.connect() try: s.sendmail(email_from_address, admins_addr, msg.as_string()) except Exception as e: logger.error(e) s.close()
def send_remind_activating_email(username): #admin_email_address = env.getenv('ADMIN_EMAIL_ADDRESS') nulladdr = ['\'\'', '\"\"', ''] email_from_address = settings.get('EMAIL_FROM_ADDRESS') admin_email_address = settings.get('ADMIN_EMAIL_ADDRESS') if (email_from_address in nulladdr or admin_email_address in nulladdr): return #text = 'Dear '+ username + ':\n' + ' Your account in docklet has been activated' text = '<html><h4>Dear '+ 'admin' + ':</h4>' text += '''<p> An activating request for %s in <a href='%s'>%s</a> has been sent</p> <p> Please check it !</p> <br/><br/> <p> Docklet Team, SEI, PKU</p> ''' % (username, env.getenv("PORTAL_URL"), env.getenv("PORTAL_URL")) text += '<p>'+ str(datetime.utcnow()) + '</p>' text += '</html>' subject = 'An activating request in Docklet has been sent' if admin_email_address[0] == '"': admins_addr = admin_email_address[1:-1].split(" ") else: admins_addr = admin_email_address.split(" ") alladdr="" for addr in admins_addr: alladdr = alladdr+addr+", " alladdr=alladdr[:-2] msg = MIMEMultipart() textmsg = MIMEText(text,'html','utf-8') msg['Subject'] = Header(subject, 'utf-8') msg['From'] = email_from_address msg['To'] = alladdr msg.attach(textmsg) s = smtplib.SMTP() s.connect() try: s.sendmail(email_from_address, admins_addr, msg.as_string()) except Exception as e: logger.error(e) s.close()
def config_prepare(content): content = content.replace("%ROOTFS%",rootfs) content = content.replace("%HOSTNAME%",hostname) content = content.replace("%TASKID%",taskid) content = content.replace("%CONTAINER_MEMORY%",str(quota.memory)) content = content.replace("%CONTAINER_CPU%",str(quota.cpu*100000)) content = content.replace("%FS_PREFIX%",self.fspath) content = content.replace("%LXCSCRIPT%",env.getenv("LXC_SCRIPT")) content = content.replace("%USERNAME%",username) content = content.replace("%LXCNAME%",lxcname) content = content.replace("%VETHPAIR%",str(taskid)+"-"+str(vnodeid)) content = content.replace("%IP%",ipaddr) content = content.replace("%BRNAME%",brname) content = content.replace("%GATEWAY%",gateway) return content
def recover_group(group_name,file_path="/opt/docklet/local/docklet-storage"): storage = env.getenv("STORAGE") if storage == "file": if not os.path.exists(file_path): logger.error("%s not found, unable to recover VG" % file_path) return False #recover mountpoint Ret = sys_run("losetup /dev/loop0") if Ret.returncode != 0: Ret = sys_run("losetup /dev/loop0 " + file_path) if Ret.returncode != 0: logger.error("losetup failed:%s" % Ret.stdout.decode('utf-8')) return False time.sleep(1) #recover vg Ret = sys_run("vgdisplay " + group_name) if Ret.returncode != 0: Ret = sys_run("vgcreate %s /dev/loop0" % group_name) if Ret.returncode != 0: logger.error("create VG %s failed:%s" % (group_name,Ret.stdout.decode('utf-8'))) return False logger.info("recover VG %s success" % group_name) elif storage == "disk": disk = env.getenv("DISK") if disk is None: logger.error("use disk for story without a physical disk") return False #recover vg Ret = sys_run("vgdisplay " + group_name) if Ret.returncode != 0: Ret = sys_run("vgcreate %s %s" % (group_name,disk)) if Ret.returncode != 0: logger.error("create VG %s failed:%s" % (group_name,Ret.stdout.decode('utf-8'))) return False logger.info("recover VG %s success" % group_name)
def start(self): # start collector self.con_collector.start() self.hosts_collector.start() logger.info("Monitor Collector has been started.") # worker change it state itself. Independedntly from master. if self.workertype == "normal": self.etcd.setkey("machines/runnodes/"+self.addr, "work") publicIP = env.getenv("PUBLIC_IP") self.etcd.setkey("machines/publicIP/"+self.addr,publicIP) self.thread_sendheartbeat = threading.Thread(target=self.sendheartbeat) self.thread_sendheartbeat.start() # start serving for rpc logger.info ("begins to work") self.rpcserver.serve_forever()
def net_billings(self, username, now_bytes_total): global monitor_vnodes if not username in self.net_lastbillings.keys(): self.net_lastbillings[username] = 0 elif int(now_bytes_total/self.bytes_per_beans) < self.net_lastbillings[username]: self.net_lastbillings[username] = 0 diff = int(now_bytes_total/self.bytes_per_beans) - self.net_lastbillings[username] if diff > 0: auth_key = env.getenv('AUTH_KEY') data = {"owner_name":username,"billing":diff, "auth_key":auth_key} header = {'Content-Type':'application/x-www-form-urlencoded'} http = Http() [resp,content] = http.request("http://"+self.master_ip+"/billing/beans/","POST",urlencode(data),headers = header) logger.info("response from master:"+content.decode('utf-8')) self.net_lastbillings[username] += diff monitor_vnodes[username]['net_stats']['net_billings'] = self.net_lastbillings[username]
def update(*args, **kwargs): try: if ( ('user_group' in kwargs) == False): return {"success":'false', "reason":"Cannot get user_group"} user_group = kwargs['user_group'] if (not ((user_group == 'admin') or (user_group == 'root'))): return {"success": 'false', "reason": 'Unauthorized Action'} newSetting = kwargs['newSetting'] settingPath = env.getenv('FS_PREFIX') + '/local/settings.conf'; settingText = json.dumps(newSetting) settingFile = open(settingPath,'w') settingFile.write(settingText) settingFile.close() args[0].setting = newSetting return {'success': 'true'} except: return {'success': 'false'}
def create_container(self,instanceid,username,image,lxcname,quota): # acquire ip [status, ip] = self.acquire_ip() if not status: return [False, ip] # prepare image and filesystem status = self.imgmgr.prepareFS(username,image,lxcname,str(quota.disk)) if not status: self.release_ip(ip) return [False, "Create container for batch failed when preparing filesystem"] rootfs = "/var/lib/lxc/%s/rootfs" % lxcname if not os.path.isdir("%s/global/users/%s" % (self.fspath,username)): path = env.getenv('DOCKLET_LIB') subprocess.call([path+"/master/userinit.sh", username]) logger.info("user %s directory not found, create it" % username) sys_run("mkdir -p /var/lib/lxc/%s" % lxcname) logger.info("generate config file for %s" % lxcname) def config_prepare(content): content = content.replace("%ROOTFS%",rootfs) content = content.replace("%HOSTNAME%","batch-%s" % str(instanceid)) content = content.replace("%CONTAINER_MEMORY%",str(quota.memory)) content = content.replace("%CONTAINER_CPU%",str(quota.cpu*100000)) content = content.replace("%FS_PREFIX%",self.fspath) content = content.replace("%LXCSCRIPT%",env.getenv("LXC_SCRIPT")) content = content.replace("%USERNAME%",username) content = content.replace("%LXCNAME%",lxcname) content = content.replace("%IP%",ip) content = content.replace("%GATEWAY%",self.cons_gateway) return content logger.info(self.confpath) conffile = open(self.confpath+"/container.batch.conf", 'r') conftext = conffile.read() conffile.close() conftext = config_prepare(conftext) conffile = open("/var/lib/lxc/%s/config" % lxcname, 'w') conffile.write(conftext) conffile.close() return [True, ip]
def __init__(self): rpc_pb2_grpc.WorkerServicer.__init__(self) etcdaddr = env.getenv("ETCD") logger.info ("using ETCD %s" % etcdaddr ) clustername = env.getenv("CLUSTER_NAME") logger.info ("using CLUSTER_NAME %s" % clustername ) # init etcdlib client try: self.etcdclient = etcdlib.Client(etcdaddr, prefix = clustername) except Exception: logger.error ("connect etcd failed, maybe etcd address not correct...") sys.exit(1) else: logger.info("etcd connected") # get master ip and report port [success,masterip] = self.etcdclient.getkey("service/master") if not success: logger.error("Fail to get master ip address.") sys.exit(1) else: self.master_ip = masterip logger.info("Get master ip address: %s" % (self.master_ip)) self.master_port = env.getenv('BATCH_MASTER_PORT') # get worker ip self.worker_ip = getip(env.getenv('NETWORK_DEVICE')) logger.info("Worker ip is :%s"%self.worker_ip) self.imgmgr = imagemgr.ImageMgr() self.fspath = env.getenv('FS_PREFIX') self.confpath = env.getenv('DOCKLET_CONF') self.rm_all_batch_containers() self.taskmsgs = [] self.msgslock = threading.Lock() self.report_interval = 2 self.lock = threading.Lock() self.mount_lock = threading.Lock() self.gpu_lock = threading.Lock() self.gpu_status = {} gpus = gputools.get_gpu_status() for gpu in gpus: self.gpu_status[gpu['id']] = "" self.start_report() logger.info('TaskWorker init success')
def init_new(): Free_Ports_str = env.getenv("ALLOCATED_PORTS") global free_ports #logger.info(Free_Ports_str) portsranges=Free_Ports_str.split(',') #logger.info(postranges) for portsrange in portsranges: portsrange=portsrange.strip().split('-') start = int(portsrange[0]) end = int(portsrange[1]) if end < start or end > 65535 or start < 1: return [False, "Illegal port ranges."] i = start #logger.info(str(start)+" "+str(end)) while i <= end: free_ports[i] = True i += 1 #logger.info(free_ports[10001]) return [True,""]