def __init__(self): dckr = docker.Client() cfg = read_config() backup_location = os.path.expanduser(cfg['backup_location']) user_home_img = os.path.expanduser(cfg['user_home_image']) mnt_location = os.path.expanduser(cfg['mnt_location']) cloud_cfg = cfg['cloud_host'] backup_bucket = cloud_cfg['backup_bucket'] make_sure_path_exists(backup_location) CloudHelper.configure(has_s3=cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'], cfg['cpu_limit'], cfg['disk_limit'], [os.path.join(mnt_location, '${DISK_ID}')], mnt_location, backup_location, user_home_img, cfg['numlocalmax'], cfg["numdisksmax"], backup_bucket=backup_bucket) # backup user files every 1 hour # check: configured expiry time must be at least twice greater than this self.run_interval = int(cfg['delete_stopped_timeout']) / 2 if self.run_interval < 3 * 60: self.run_interval = 3 * 60 self.delete_stopped_timeout = int(cfg['delete_stopped_timeout']) self.log_info("Backup interval: " + str(self.run_interval / 60) + " minutes") self.log_info("Stopped containers would be deleted after " + str(self.delete_stopped_timeout / 60) + " minutes")
def do_containers(self): sections = [] loads = [] iac = [] ac = [] sections.append(["Active", ac]) sections.append(["Inactive", iac]) delete_id = self.get_argument("delete_id", '') stop_id = self.get_argument("stop_id", '') stop_all = (self.get_argument('stop_all', None) is not None) if stop_all: all_containers = JBoxContainer.DCKR.containers(all=False) for c in all_containers: cont = JBoxContainer(c['Id']) cname = cont.get_name() if None == cname: self.log_info("Admin: Not stopping unknown " + cont.debug_str()) elif cname not in self.config("protected_docknames"): cont.stop() elif not (stop_id == ''): cont = JBoxContainer(stop_id) cont.stop() elif not (delete_id == ''): cont = JBoxContainer(delete_id) cont.delete() # get them all again (in case we deleted some) jsonobj = JBoxContainer.DCKR.containers(all=all) for c in jsonobj: o = dict() o["Id"] = c["Id"][0:12] o["Status"] = c["Status"] if ("Names" in c) and (c["Names"] is not None): o["Name"] = c["Names"][0] else: o["Name"] = "/None" if (c["Ports"] is None) or (c["Ports"] == []): iac.append(o) else: ac.append(o) # get cluster loads average_load = CloudHelper.get_cluster_average_stats('Load') if None != average_load: loads.append({'instance': 'Average', 'load': average_load}) machine_loads = CloudHelper.get_cluster_stats('Load') if None != machine_loads: for n, v in machine_loads.iteritems(): loads.append({'instance': n, 'load': v}) return sections, loads
def do_housekeeping(): server_delete_timeout = JBox.cfg['expire'] JBoxContainer.maintain(max_timeout=server_delete_timeout, inactive_timeout=JBox.cfg['inactivity_timeout'], protected_names=JBox.cfg['protected_docknames']) if JBox.cfg['scale_down'] and (JBoxContainer.num_active() == 0) and \ (JBoxContainer.num_stopped() == 0) and CloudHelper.should_terminate(): JBox.log_info("terminating to scale down") CloudHelper.terminate_instance()
def do_containers(self): sections = [] loads = [] iac = [] ac = [] sections.append(["Active", ac]) sections.append(["Inactive", iac]) delete_id = self.get_argument("delete_id", '') stop_id = self.get_argument("stop_id", '') stop_all = (self.get_argument('stop_all', None) is not None) if stop_all: all_containers = JBoxContainer.DCKR.containers(all=False) for c in all_containers: cont = JBoxContainer(c['Id']) cname = cont.get_name() if None == cname: self.log_info("Admin: Not stopping unknown " + cont.debug_str()) elif cname not in self.config("protected_docknames"): cont.stop() elif not (stop_id == ''): cont = JBoxContainer(stop_id) cont.stop() elif not (delete_id == ''): cont = JBoxContainer(delete_id) cont.delete() # get them all again (in case we deleted some) jsonobj = JBoxContainer.DCKR.containers(all=all) for c in jsonobj: o = dict() o["Id"] = c["Id"][0:12] o["Status"] = c["Status"] if ("Names" in c) and (c["Names"] is not None): o["Name"] = c["Names"][0] else: o["Name"] = "/None" if (c["Ports"] is None) or (c["Ports"] == []): iac.append(o) else: ac.append(o) # get cluster loads average_load = CloudHelper.get_cluster_average_stats('Load') if None != average_load: loads.append({'instance': 'Average', 'load': average_load}) machine_loads = CloudHelper.get_cluster_stats('Load') if None != machine_loads: for n, v in machine_loads.iteritems(): loads.append({'instance': n, 'load': v}) return sections, loads
def __init__(self): cfg = JBox.cfg = read_config() dckr = docker.Client() cloud_cfg = cfg['cloud_host'] JBoxHandler.configure(cfg) JBoxDB.configure(cfg) if 'jbox_users_v2' in cloud_cfg: JBoxUserV2.NAME = cloud_cfg['jbox_users_v2'] if 'jbox_invites' in cloud_cfg: JBoxInvite.NAME = cloud_cfg['jbox_invites'] if 'jbox_accounting_v2' in cloud_cfg: JBoxAccountingV2.NAME = cloud_cfg['jbox_accounting_v2'] CloudHelper.configure(has_s3=cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], has_autoscale=cloud_cfg['autoscale'], has_route53=cloud_cfg['route53'], scale_up_at_load=cloud_cfg['scale_up_at_load'], scale_up_policy=cloud_cfg['scale_up_policy'], autoscale_group=cloud_cfg['autoscale_group'], route53_domain=cloud_cfg['route53_domain'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) backup_location = os.path.expanduser(cfg['backup_location']) user_home_img = os.path.expanduser(cfg['user_home_image']) mnt_location = os.path.expanduser(cfg['mnt_location']) backup_bucket = cloud_cfg['backup_bucket'] make_sure_path_exists(backup_location) JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'], cfg['cpu_limit'], cfg['disk_limit'], [os.path.join(mnt_location, '${DISK_ID}')], mnt_location, backup_location, user_home_img, cfg['numlocalmax'], cfg["numdisksmax"], backup_bucket=backup_bucket) self.application = tornado.web.Application([ (r"/", MainHandler), (r"/hostlaunchipnb/", AuthHandler), (r"/hostadmin/", AdminHandler), (r"/ping/", PingHandler), (r"/cors/", CorsHandler) ]) cookie_secret = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in xrange(32)) self.application.settings["cookie_secret"] = cookie_secret self.application.settings["google_oauth"] = cfg["google_oauth"] self.application.listen(cfg["port"]) self.ioloop = tornado.ioloop.IOLoop.instance() # run container maintainence every 5 minutes run_interval = 5 * 60 * 1000 self.log_info("Container maintenance every " + str(run_interval / (60 * 1000)) + " minutes") self.ct = tornado.ioloop.PeriodicCallback(JBox.do_housekeeping, run_interval, self.ioloop)
def __init__(self): cfg = JBox.cfg = read_config() dckr = docker.Client() cloud_cfg = cfg['cloud_host'] JBoxHandler.configure(cfg) JBoxDB.configure(cfg) if 'jbox_users_v2' in cloud_cfg: JBoxUserV2.NAME = cloud_cfg['jbox_users_v2'] if 'jbox_invites' in cloud_cfg: JBoxInvite.NAME = cloud_cfg['jbox_invites'] if 'jbox_accounting_v2' in cloud_cfg: JBoxAccountingV2.NAME = cloud_cfg['jbox_accounting_v2'] CloudHelper.configure(has_s3=cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) backup_location = os.path.expanduser(cfg['backup_location']) user_home_img = os.path.expanduser(cfg['user_home_image']) mnt_location = os.path.expanduser(cfg['mnt_location']) backup_bucket = cloud_cfg['backup_bucket'] make_sure_path_exists(backup_location) JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'], cfg['cpu_limit'], cfg['disk_limit'], [os.path.join(mnt_location, '${DISK_ID}')], mnt_location, backup_location, user_home_img, cfg['numlocalmax'], cfg["numdisksmax"], backup_bucket=backup_bucket) self.application = tornado.web.Application([ (r"/", MainHandler), (r"/hostlaunchipnb/", AuthHandler), (r"/hostadmin/", AdminHandler), (r"/ping/", PingHandler), (r"/cors/", CorsHandler) ]) cookie_secret = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in xrange(32)) self.application.settings["cookie_secret"] = cookie_secret self.application.settings["google_oauth"] = cfg["google_oauth"] self.application.listen(cfg["port"]) self.ioloop = tornado.ioloop.IOLoop.instance() # run container maintainence every 5 minutes run_interval = 5 * 60 * 1000 self.log_info("Container maintenance every " + str(run_interval / (60 * 1000)) + " minutes") self.ct = tornado.ioloop.PeriodicCallback(JBox.do_housekeeping, run_interval, self.ioloop)
def chk_and_launch_docker(self, sessname, creds, authtok, user_id): cont = JBoxContainer.get_by_name(sessname) nhops = int(self.get_argument('h', 0)) self.log_debug("got hop " + repr(nhops) + " for session " + repr(sessname)) self.log_debug("have existing container for " + repr(sessname) + ": " + repr(None != cont)) if cont is not None: self.log_debug("container running: " + str(cont.is_running())) if ((None == cont) or (not cont.is_running())) and (not CloudHelper.should_accept_session()): if None != cont: cont.backup() cont.delete() self.clear_container_cookies() self.set_header('Connection', 'close') self.request.connection.no_keep_alive = True if nhops > self.config('numhopmax', 0): self.rendertpl("index.tpl", cfg=self.config(), state=self.state( error="Maximum number of JuliaBox instances active. Please try after sometime.", success='')) else: self.redirect('/?h=' + str(nhops + 1)) else: cont = JBoxContainer.launch_by_name(sessname, user_id, True) (shellport, uplport, ipnbport) = cont.get_host_ports() sign = signstr(sessname + str(shellport) + str(uplport) + str(ipnbport), self.config("sesskey")) self.set_container_cookies({ "sessname": sessname, "hostshell": shellport, "hostupload": uplport, "hostipnb": ipnbport, "sign": sign }) self.set_lb_tracker_cookie() self.rendertpl("ipnbsess.tpl", sessname=sessname, cfg=self.config(), creds=creds, authtok=authtok, user_id=user_id)
def __init__(self): dckr = docker.Client() cfg = read_config() backup_location = os.path.expanduser(cfg['backup_location']) user_home_img = os.path.expanduser(cfg['user_home_image']) mnt_location = os.path.expanduser(cfg['mnt_location']) cloud_cfg = cfg['cloud_host'] backup_bucket = cloud_cfg['backup_bucket'] make_sure_path_exists(backup_location) CloudHelper.configure(has_s3=cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], has_autoscale=cloud_cfg['autoscale'], has_route53=cloud_cfg['route53'], scale_up_at_load=cloud_cfg['scale_up_at_load'], scale_up_policy=cloud_cfg['scale_up_policy'], autoscale_group=cloud_cfg['autoscale_group'], route53_domain=cloud_cfg['route53_domain'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'], cfg['cpu_limit'], cfg['disk_limit'], [os.path.join(mnt_location, '${DISK_ID}')], mnt_location, backup_location, user_home_img, cfg['numlocalmax'], cfg["numdisksmax"], backup_bucket=backup_bucket) # backup user files every 1 hour # check: configured expiry time must be at least twice greater than this self.run_interval = int(cfg['delete_stopped_timeout']) / 2 if self.run_interval < 3 * 60: self.run_interval = 3 * 60 self.delete_stopped_timeout = int(cfg['delete_stopped_timeout']) self.log_info("Backup interval: " + str(self.run_interval / 60) + " minutes") self.log_info("Stopped containers would be deleted after " + str(self.delete_stopped_timeout / 60) + " minutes")
def backup(self): JBoxContainer.log_info("Backing up " + self.debug_str() + " at " + str(JBoxContainer.BACKUP_LOC)) cname = self.get_name() if cname is None: return bkup_file = os.path.join(JBoxContainer.BACKUP_LOC, cname[1:] + ".tar.gz") if not self.is_running(): k = JBoxContainer.pull_from_s3(bkup_file, True) bkup_file_mtime = None if k is not None: bkup_file_mtime = JBoxContainer.parse_iso_time(k.get_metadata('backup_time')) elif os.path.exists(bkup_file): bkup_file_mtime = datetime.datetime.fromtimestamp(os.path.getmtime(bkup_file), pytz.utc) + \ datetime.timedelta(seconds=JBoxContainer.LOCAL_TZ_OFFSET) if bkup_file_mtime is not None: tstart = self.time_started() tstop = self.time_finished() tcomp = tstart if ((tstop is None) or (tstart > tstop)) else tstop if tcomp <= bkup_file_mtime: JBoxContainer.log_info("Already backed up " + self.debug_str()) return disk_ids_used = self.get_disk_ids_used() if len(disk_ids_used) > 0: if len(disk_ids_used) > 1: JBoxContainer.log_info("Can not backup more than one disks per user yet. Backing up the first disk.") elif len(disk_ids_used) == 0: JBoxContainer.log_info("No disks to backup") return disk_id_used = disk_ids_used[0] disk_path = os.path.join(JBoxContainer.FS_LOC, str(disk_id_used)) bkup_tar = tarfile.open(bkup_file, 'w:gz') for f in os.listdir(disk_path): if f.startswith('.') and (f in ['.julia', '.ipython']): continue full_path = os.path.join(disk_path, f) bkup_tar.add(full_path, os.path.join('juser', f)) bkup_tar.close() os.chmod(bkup_file, 0666) ensure_delete(disk_path) # Upload to S3 if so configured. Delete from local if successful. bkup_file_mtime = datetime.datetime.fromtimestamp(os.path.getmtime(bkup_file), pytz.utc) + datetime.timedelta( seconds=JBoxContainer.LOCAL_TZ_OFFSET) if JBoxContainer.BACKUP_BUCKET is not None: if CloudHelper.push_file_to_s3(JBoxContainer.BACKUP_BUCKET, bkup_file, metadata={'backup_time': bkup_file_mtime.isoformat()}) is not None: os.remove(bkup_file) JBoxContainer.log_info("Moved backup to S3 " + self.debug_str())
def run(self): try: CloudHelper.deregister_instance_dns() except: CloudHelper.log_info("No prior dns registration found for the instance") CloudHelper.register_instance_dns() JBoxContainer.publish_container_stats() self.ct.start() self.ioloop.start()
def chk_and_launch_docker(self, sessname, creds, authtok, user_id): cont = JBoxContainer.get_by_name(sessname) nhops = int(self.get_argument('h', 0)) self.log_debug("got hop " + repr(nhops) + " for session " + repr(sessname)) self.log_debug("have existing container for " + repr(sessname) + ": " + repr(None != cont)) if cont is not None: self.log_debug("container running: " + str(cont.is_running())) if ((None == cont) or (not cont.is_running())) and ( not CloudHelper.should_accept_session()): if None != cont: cont.backup() cont.delete() self.clear_container_cookies() self.set_header('Connection', 'close') self.request.connection.no_keep_alive = True if nhops > self.config('numhopmax', 0): self.rendertpl( "index.tpl", cfg=self.config(), state=self.state( error= "Maximum number of JuliaBox instances active. Please try after sometime.", success='')) else: self.redirect('/?h=' + str(nhops + 1)) else: cont = JBoxContainer.launch_by_name(sessname, True) (shellport, uplport, ipnbport) = cont.get_host_ports() sign = signstr( sessname + str(shellport) + str(uplport) + str(ipnbport), self.config("sesskey")) self.set_container_cookies({ "sessname": sessname, "hostshell": shellport, "hostupload": uplport, "hostipnb": ipnbport, "sign": sign }) self.set_lb_tracker_cookie() self.rendertpl("ipnbsess.tpl", sessname=sessname, cfg=self.config(), creds=creds, authtok=authtok, user_id=user_id)
def run(self): try: CloudHelper.deregister_instance_dns() except: CloudHelper.log_info( "No prior dns registration found for the instance") CloudHelper.register_instance_dns() JBoxContainer.publish_container_stats() self.ct.start() self.ioloop.start()
def get(self): args = self.get_argument('m', default=None) if args is not None: args = json.loads(decrypt(base64.b64decode(args), self.config(key='sesskey'))) if args is not None: self.log_debug("setting cookies") for cname in ['sessname', 'hostshell', 'hostupload', 'hostipnb', 'sign', 'juliabox']: self.set_cookie(cname, args[cname]) self.set_status(status_code=204) self.finish() else: args = dict() for cname in ['sessname', 'hostshell', 'hostupload', 'hostipnb', 'sign', 'juliabox']: args[cname] = self.get_cookie(cname) args = tornado.escape.url_escape(base64.b64encode(encrypt(json.dumps(args), self.config(key='sesskey')))) url = "//" + CloudHelper.notebook_websocket_hostname() + "/cors/?m=" + args self.log_debug("redirecting to " + url) self.redirect(url)
def set_lb_tracker_cookie(self): self.set_cookie('lb', signstr(CloudHelper.instance_id(), self.config('sesskey')), expires_days=30)
def set_lb_tracker_cookie(self): self.set_cookie('lb', signstr(CloudHelper.instance_id(), self.config('sesskey')), expires_days=30)
def conn(cls): if JBoxDB.CONN is None: JBoxDB.CONN = CloudHelper.connect_dynamodb() cls.log_info("DB connected: " + str(JBoxDB.CONN is not None)) return JBoxDB.CONN
def publish_container_stats(): """ Publish custom cloudwatch statistics. Used for status monitoring and auto scaling. """ nactive = JBoxContainer.num_active() CloudHelper.publish_stats("NumActiveContainers", "Count", nactive) cpu_used_pct = psutil.cpu_percent() mem_used_pct = psutil.virtual_memory().percent CloudHelper.publish_stats("MemUsed", "Percent", mem_used_pct) disk_used_pct = 0 for x in psutil.disk_partitions(): try: disk_used_pct = max(psutil.disk_usage(x.mountpoint).percent, disk_used_pct) except: pass CloudHelper.publish_stats("DiskUsed", "Percent", disk_used_pct) cont_load_pct = min(100, max(0, nactive * 100 / JBoxContainer.MAX_CONTAINERS)) CloudHelper.publish_stats("ContainersUsed", "Percent", cont_load_pct) disk_ids_used_pct = JBoxContainer.disk_ids_used_pct() CloudHelper.publish_stats("DiskIdsUsed", "Percent", disk_ids_used_pct) overall_load_pct = max(cont_load_pct, disk_used_pct, mem_used_pct, cpu_used_pct, disk_ids_used_pct) CloudHelper.publish_stats("Load", "Percent", overall_load_pct)
def setup_instance_config(disk_path): nbconfig = os.path.join(disk_path, '.ipython/profile_julia/ipython_notebook_config.py') with open(nbconfig, "a") as nbconfig_file: nbconfig_file.write( "c.NotebookApp.websocket_url = 'ws://" + CloudHelper.instance_public_hostname() + "'\n")
def pull_from_s3(local_file, metadata_only=False): if JBoxContainer.BACKUP_BUCKET is None: return None return CloudHelper.pull_file_from_s3(JBoxContainer.BACKUP_BUCKET, local_file, metadata_only=metadata_only)
def publish_container_stats(): """ Publish custom cloudwatch statistics. Used for status monitoring and auto scaling. """ nactive = JBoxContainer.num_active() CloudHelper.publish_stats("NumActiveContainers", "Count", nactive) curr_cpu_used_pct = psutil.cpu_percent() last_cpu_used_pct = curr_cpu_used_pct if JBoxContainer.LAST_CPU_PCT is None else JBoxContainer.LAST_CPU_PCT JBoxContainer.LAST_CPU_PCT = curr_cpu_used_pct cpu_used_pct = int((curr_cpu_used_pct + last_cpu_used_pct)/2) mem_used_pct = psutil.virtual_memory().percent CloudHelper.publish_stats("MemUsed", "Percent", mem_used_pct) disk_used_pct = 0 for x in psutil.disk_partitions(): if x.mountpoint.startswith(JBoxContainer.FS_LOC): continue try: disk_used_pct = max(psutil.disk_usage(x.mountpoint).percent, disk_used_pct) except: pass if JBoxContainer.INITIAL_DISK_USED_PCT is None: JBoxContainer.INITIAL_DISK_USED_PCT = disk_used_pct disk_used_pct = max(0, (disk_used_pct - JBoxContainer.INITIAL_DISK_USED_PCT)) CloudHelper.publish_stats("DiskUsed", "Percent", disk_used_pct) cont_load_pct = min(100, max(0, nactive * 100 / JBoxContainer.MAX_CONTAINERS)) CloudHelper.publish_stats("ContainersUsed", "Percent", cont_load_pct) disk_ids_used_pct = JBoxContainer.disk_ids_used_pct() CloudHelper.publish_stats("DiskIdsUsed", "Percent", disk_ids_used_pct) overall_load_pct = max(cont_load_pct, disk_used_pct, mem_used_pct, cpu_used_pct, disk_ids_used_pct) CloudHelper.publish_stats("Load", "Percent", overall_load_pct)
def conn(cls): if JBoxDB.CONN is None: JBoxDB.CONN = CloudHelper.connect_dynamodb() cls.log_info("DB connected: " + str(JBoxDB.CONN is not None)) return JBoxDB.CONN