class VPSMgr(object): """ all exception should catch and log in this class """ VERSION = 1 def __init__(self): self.logger = Log("vps_mgr", config=conf) self.logger_err = Log("vps_mgr_err", config=conf) self.logger_misc = Log("misc", config=conf) self.host_id = conf.HOST_ID self.handlers = { Cmd.OPEN: self.__class__.vps_open, Cmd.REBOOT: self.__class__.vps_reboot, Cmd.CLOSE: self.__class__.vps_close, Cmd.OS: self.__class__.vps_reinstall_os, } self.timer = TimerEvents(time.time, self.logger_misc) assert conf.NETFLOW_COLLECT_INV > 0 self.timer.add_timer(conf.NETFLOW_COLLECT_INV, self.send_netflow) self.timer.add_timer(12 * 3600, self.refresh_host_space) self.workers = [] self.running = False def get_client(self): return get_client(VPS) def send_netflow(self): result = None try: result = netflow.read_proc() except Exception, e: self.logger_misc.exception( "cannot read netflow data from proc: %s" % (str(e))) return ts = time.time() netflow_list = list() try: for ifname, v in result.iteritems(): om = re.match("^vps(\d+)$", ifname) if not om: continue vps_id = int(om.group(1)) if vps_id <= 0: continue # direction of vps bridged network interface needs to be reversed netflow_list.append(NetFlow(vps_id, rx=v[1], tx=v[0])) except Exception, e: self.logger_misc.exception("netflow data format error: %s" % (str(e))) return
class VPSMgr (object): """ all exception should catch and log in this class """ VERSION = 1 def __init__ (self): self.logger = Log ("vps_mgr", config=conf) self.logger_err = Log ("vps_mgr_err", config=conf) self.logger_misc = Log ("misc", config=conf) self.host_id = conf.HOST_ID self.handlers = { Cmd.OPEN: self.__class__.vps_open, Cmd.REBOOT: self.__class__.vps_reboot, Cmd.CLOSE: self.__class__.vps_close, Cmd.OS: self.__class__.vps_reinstall_os, } self.timer = TimerEvents (time.time, self.logger_misc) assert conf.NETFLOW_COLLECT_INV > 0 self.timer.add_timer (conf.NETFLOW_COLLECT_INV, self.send_netflow) self.timer.add_timer (12 * 3600, self.refresh_host_space) self.workers = [] self.running = False def get_client (self): return get_client (VPS) def send_netflow (self): result = None try: result = netflow.read_proc () except Exception, e: self.logger_misc.exception ("cannot read netflow data from proc: %s" % (str(e))) return ts = time.time () netflow_list = list () try: for ifname, v in result.iteritems (): om = re.match ("^vps(\d+)$", ifname) if not om: continue vps_id = int(om.group (1)) if vps_id <= 0: continue # direction of vps bridged network interface needs to be reversed netflow_list.append (NetFlow (vps_id, rx=v[1], tx=v[0])) except Exception, e: self.logger_misc.exception ("netflow data format error: %s" % (str(e))) return
class VPSMgr(object): """ all exception should catch and log in this class """ VERSION = 1 def __init__(self): self.logger = Log("vps_mgr", config=conf) self.logger_net = Log("vps_mgr_net", config=conf) self.logger_misc = Log("misc", config=conf) self.logger_debug = Log("debug", config=conf) self.host_id = conf.HOST_ID self.vpsops = VPSOps(self.logger) self.handlers = { CMD.OPEN: self.__class__.vps_open, CMD.REBOOT: self.__class__.vps_reboot, CMD.CLOSE: self.__class__.vps_close, CMD.OS: self.__class__.vps_reinstall_os, CMD.UPGRADE: self.__class__.vps_upgrade, CMD.BANDWIDTH: self.__class__.vps_set_bandwidth, CMD.RM: self.__class__.vps_delete, CMD.PRE_SYNC: self.__class__.vps_hot_sync, CMD.MIGRATE: self.__class__.vps_migrate, CMD.RESET_PW: self.__class__.vps_reset_pw, } self._locker = threading.Lock() self._vps_locker = dict() self.xenstat = XenStat() self.timer = TimerEvents(time.time, self.logger_misc) assert conf.MONITOR_COLLECT_INV > 0 self.last_netflow = None self.last_diskstat = None self.monitor_inv = conf.MONITOR_COLLECT_INV self.last_monitor_ts = None self.timer.add_timer(conf.MONITOR_COLLECT_INV, self.monitor_vps) self.timer.add_timer(12 * 3600, self.refresh_host_space) self.workers = [] self.running = False def _try_lock_vps(self, cmd, vps_id): self._locker.acquire() if self._vps_locker.has_key(vps_id): _cmd = self._vps_locker.get(vps_id) self.logger_debug.info("CMD %s try to lock vps%s failed: locked by CMD %s" % ( CMD._get_name(cmd), vps_id, CMD._get_name(_cmd) )) res = False else: self._vps_locker[vps_id] = cmd res = True self._locker.release() return res def _unlock_vps(self, cmd, vps_id): self._locker.acquire() try: _cmd = self._vps_locker.get(vps_id) if _cmd == cmd: del self._vps_locker[vps_id] except KeyError: pass self._locker.release() def rpc_connect(self): rpc = SAAS_Client(self.host_id, self.logger_debug) rpc.connect() return rpc def monitor_vps(self): net_result = None disk_result = None try: net_result = netflow.read_proc() disk_devs = glob.glob("/dev/main/vps*") if 'MAIN_DISK' in dir(conf): disk_devs.append(conf.MAIN_DISK) disk_result = diskstat.read_stat(disk_devs) except Exception, e: self.logger_misc.exception( "cannot read netflow data from proc: %s" % (str(e))) return ts = time.time() dom_map = XenStore.domain_name_id_map() dom_names = dom_map.keys() self.xenstat.run(dom_names) payload = CarbonPayload() try: payload.append("host.cpu.%s.all" % (self.host_id), ts, self.xenstat.total_cpu) for dom_name in dom_names: om = re.match("^vps(\d+)$", dom_name) if not om: # dom0 dom_cpu = self.xenstat.dom_dict.get(dom_name) if dom_cpu: payload.append("host.cpu.%s.dom0" % (self.host_id), dom_cpu['ts'], dom_cpu['cpu_avg']) if 'MAIN_DISK' in dir(conf) and self.last_diskstat: t_elapse = ts - self.last_monitor_ts v = disk_result.get(conf.MAIN_DISK) last_v = self.last_diskstat.get(conf.MAIN_DISK) read_ops, read_byte, write_ops, write_byte, util = diskstat.cal_stat( v, last_v, t_elapse) payload.append("host.io.%d.ops.read" % (self.host_id), ts, read_ops) payload.append("host.io.%d.ops.write" % (self.host_id), ts, write_ops) payload.append("host.io.%s.traffic.read" % (self.host_id), ts, read_byte) payload.append("host.io.%s.traffic.write" % (self.host_id), ts, write_byte) payload.append("host.io.%s.util" % (self.host_id), ts, util) print conf.MAIN_DISK, read_ops, write_ops, read_byte, write_byte, util if self.last_netflow: t_elapse = ts - self.last_monitor_ts v = net_result.get(conf.EXT_INF) last_v = self.last_netflow.get(conf.EXT_INF) _in = fix_flow((v[0] - last_v[0]) * 8.0 / t_elapse) _out = fix_flow((v[1] - last_v[1]) * 8.0 / t_elapse) _in_pp = (v[2] - last_v[2]) / t_elapse _out_pp = (v[3] - last_v[3]) / t_elapse payload.append("host.netflow.%d.ext.in" % (self.host_id), ts, _in) payload.append("host.netflow.%d.ext.out" % (self.host_id), ts, _out) payload.append("host.netflow.%d.ext_pp.in" % (self.host_id), ts, _in_pp > 0 and _in_pp or 0) payload.append("host.netflow.%d.ext_pp.out" % (self.host_id), ts, _out_pp > 0 and _out_pp or 0) v = net_result.get(conf.INT_INF) last_v = self.last_netflow.get(conf.INT_INF) _in = fix_flow((v[0] - last_v[0]) * 8.0 / t_elapse) _out = fix_flow((v[1] - last_v[1]) * 8.0 / t_elapse) _in_pp = (v[2] - last_v[2]) / t_elapse _out_pp = (v[3] - last_v[3]) / t_elapse payload.append("host.netflow.%d.int.in" % (self.host_id), ts, _in) payload.append("host.netflow.%d.int.out" % (self.host_id), ts, _out) payload.append("host.netflow.%d.int_pp.in" % (self.host_id), ts, _in_pp > 0 and _in_pp or 0) payload.append("host.netflow.%d.int_pp.out" % (self.host_id), ts, _out_pp > 0 and _out_pp or 0) else: vps_id = int(om.group(1)) xv = self.vpsops.load_vps_meta(vps_id) dom_cpu = self.xenstat.dom_dict.get(dom_name) if dom_cpu: payload.append("vps.cpu.%s" % (vps_id), dom_cpu['ts'], dom_cpu['cpu_avg']) if not self.last_netflow or not self.last_diskstat: break # net ifname = dom_name vif = xv.vifs.get(ifname) v = net_result.get(ifname) last_v = self.last_netflow.get(ifname) t_elapse = ts - self.last_monitor_ts if v and last_v: # direction of vps bridged network interface needs to # be reversed _in = fix_flow((v[1] - last_v[1]) * 8.0 / t_elapse) _out = fix_flow((v[0] - last_v[0]) * 8.0 / t_elapse) _in = (vif.bandwidth and vif.bandwidth * 1024 * 1024 < _in) and vif.bandwidth * 1024 * 1024 or _in _out = (vif.bandwidth and vif.bandwidth * 1024 * 1024 < _out) and vif.bandwidth * 1024 * 1024 or _out payload.append("vps.netflow.%d.in" % (vps_id), ts, _in) payload.append("vps.netflow.%d.out" % (vps_id), ts, _out) if conf.LARGE_NETFLOW and _in >= conf.LARGE_NETFLOW or _out >= conf.LARGE_NETFLOW: self.logger_misc.warn( "%s in: %.3f mbps, out: %.3f mbps" % (ifname, _in / 1024.0 / 1024.0, _out / 1024.0 / 1024.0)) # disk if conf.USE_LVM and self.last_diskstat: for disk in xv.data_disks.values(): v = disk_result.get(disk.dev) last_v = self.last_diskstat.get(disk.dev) if not last_v: continue read_ops, read_byte, write_ops, write_byte, util = diskstat.cal_stat( v, last_v, t_elapse) print disk.xen_dev payload.append("vps.io.%d.%s.ops.read" % (vps_id, disk.xen_dev), ts, read_ops) payload.append("vps.io.%d.%s.ops.write" % (vps_id, disk.xen_dev), ts, write_ops) payload.append("vps.io.%d.%s.traffic.read" % (vps_id, disk.xen_dev), ts, read_byte) payload.append("vps.io.%d.%s.traffic.write" % (vps_id, disk.xen_dev), ts, write_byte) payload.append("vps.io.%d.%s.util" % (vps_id, disk.xen_dev), ts, util) v = disk_result.get(xv.swap_store.dev) last_v = self.last_diskstat.get(xv.swap_store.dev) if v and last_v: read_ops, read_byte, write_ops, write_byte, util = diskstat.cal_stat( v, last_v, t_elapse) payload.append("vps.io.%d.swap.ops.read" % (vps_id), ts, read_ops) payload.append("vps.io.%d.swap.ops.write" % (vps_id), ts, write_ops) payload.append("vps.io.%d.swap.traffic.read" % (vps_id), ts, read_byte) payload.append("vps.io.%d.swap.traffic.write" % (vps_id), ts, write_byte) payload.append("vps.io.%d.swap.util" % (vps_id), ts, util) self.last_netflow = net_result self.last_diskstat = disk_result self.last_monitor_ts = ts except Exception, e: self.logger_misc.exception(e) return
class VPSMgr(object): """ all exception should catch and log in this class """ VERSION = 1 def __init__(self): self.logger = Log("vps_mgr", config=conf) self.logger_net = Log("vps_mgr_net", config=conf) self.logger_misc = Log("misc", config=conf) self.logger_debug = Log("debug", config=conf) self.host_id = conf.HOST_ID self.vpsops = VPSOps(self.logger) self.handlers = { CMD.OPEN: self.__class__.vps_open, CMD.REBOOT: self.__class__.vps_reboot, CMD.CLOSE: self.__class__.vps_close, CMD.OS: self.__class__.vps_reinstall_os, CMD.UPGRADE: self.__class__.vps_upgrade, CMD.BANDWIDTH: self.__class__.vps_set_bandwidth, CMD.RM: self.__class__.vps_delete, CMD.PRE_SYNC: self.__class__.vps_hot_sync, CMD.MIGRATE: self.__class__.vps_migrate, CMD.RESET_PW: self.__class__.vps_reset_pw, } self._locker = threading.Lock() self._vps_locker = dict() self.xenstat = XenStat() self.timer = TimerEvents(time.time, self.logger_misc) assert conf.MONITOR_COLLECT_INV > 0 self.last_netflow = None self.last_diskstat = None self.monitor_inv = conf.MONITOR_COLLECT_INV self.last_monitor_ts = None self.timer.add_timer(conf.MONITOR_COLLECT_INV, self.monitor_vps) self.timer.add_timer(12 * 3600, self.refresh_host_space) self.workers = [] self.running = False def _try_lock_vps(self, cmd, vps_id): self._locker.acquire() if self._vps_locker.has_key(vps_id): _cmd = self._vps_locker.get(vps_id) self.logger_debug.info( "CMD %s try to lock vps%s failed: locked by CMD %s" % (CMD._get_name(cmd), vps_id, CMD._get_name(_cmd))) res = False else: self._vps_locker[vps_id] = cmd res = True self._locker.release() return res def _unlock_vps(self, cmd, vps_id): self._locker.acquire() try: _cmd = self._vps_locker.get(vps_id) if _cmd == cmd: del self._vps_locker[vps_id] except KeyError: pass self._locker.release() def rpc_connect(self): rpc = SAAS_Client(self.host_id, self.logger_debug) rpc.connect() return rpc def monitor_vps(self): net_result = None disk_result = None try: net_result = netflow.read_proc() disk_devs = glob.glob("/dev/main/vps*") if 'MAIN_DISK' in dir(conf): disk_devs.append(conf.MAIN_DISK) disk_result = diskstat.read_stat(disk_devs) except Exception, e: self.logger_misc.exception( "cannot read netflow data from proc: %s" % (str(e))) return ts = time.time() dom_map = XenStore.domain_name_id_map() dom_names = dom_map.keys() self.xenstat.run(dom_names) payload = CarbonPayload() try: payload.append("host.cpu.%s.all" % (self.host_id), ts, self.xenstat.total_cpu) for dom_name in dom_names: om = re.match("^vps(\d+)$", dom_name) if not om: # dom0 dom_cpu = self.xenstat.dom_dict.get(dom_name) if dom_cpu: payload.append("host.cpu.%s.dom0" % (self.host_id), dom_cpu['ts'], dom_cpu['cpu_avg']) if 'MAIN_DISK' in dir(conf) and self.last_diskstat: t_elapse = ts - self.last_monitor_ts v = disk_result.get(conf.MAIN_DISK) last_v = self.last_diskstat.get(conf.MAIN_DISK) read_ops, read_byte, write_ops, write_byte, util = diskstat.cal_stat( v, last_v, t_elapse) payload.append("host.io.%d.ops.read" % (self.host_id), ts, read_ops) payload.append("host.io.%d.ops.write" % (self.host_id), ts, write_ops) payload.append( "host.io.%s.traffic.read" % (self.host_id), ts, read_byte) payload.append( "host.io.%s.traffic.write" % (self.host_id), ts, write_byte) payload.append("host.io.%s.util" % (self.host_id), ts, util) print conf.MAIN_DISK, read_ops, write_ops, read_byte, write_byte, util if self.last_netflow: t_elapse = ts - self.last_monitor_ts v = net_result.get(conf.EXT_INF) last_v = self.last_netflow.get(conf.EXT_INF) _in = fix_flow((v[0] - last_v[0]) * 8.0 / t_elapse) _out = fix_flow((v[1] - last_v[1]) * 8.0 / t_elapse) _in_pp = (v[2] - last_v[2]) / t_elapse _out_pp = (v[3] - last_v[3]) / t_elapse payload.append( "host.netflow.%d.ext.in" % (self.host_id), ts, _in) payload.append( "host.netflow.%d.ext.out" % (self.host_id), ts, _out) payload.append( "host.netflow.%d.ext_pp.in" % (self.host_id), ts, _in_pp > 0 and _in_pp or 0) payload.append( "host.netflow.%d.ext_pp.out" % (self.host_id), ts, _out_pp > 0 and _out_pp or 0) v = net_result.get(conf.INT_INF) last_v = self.last_netflow.get(conf.INT_INF) _in = fix_flow((v[0] - last_v[0]) * 8.0 / t_elapse) _out = fix_flow((v[1] - last_v[1]) * 8.0 / t_elapse) _in_pp = (v[2] - last_v[2]) / t_elapse _out_pp = (v[3] - last_v[3]) / t_elapse payload.append( "host.netflow.%d.int.in" % (self.host_id), ts, _in) payload.append( "host.netflow.%d.int.out" % (self.host_id), ts, _out) payload.append( "host.netflow.%d.int_pp.in" % (self.host_id), ts, _in_pp > 0 and _in_pp or 0) payload.append( "host.netflow.%d.int_pp.out" % (self.host_id), ts, _out_pp > 0 and _out_pp or 0) else: vps_id = int(om.group(1)) xv = self.vpsops.load_vps_meta(vps_id) dom_cpu = self.xenstat.dom_dict.get(dom_name) if dom_cpu: payload.append("vps.cpu.%s" % (vps_id), dom_cpu['ts'], dom_cpu['cpu_avg']) if not self.last_netflow or not self.last_diskstat: break # net ifname = dom_name vif = xv.vifs.get(ifname) v = net_result.get(ifname) last_v = self.last_netflow.get(ifname) t_elapse = ts - self.last_monitor_ts if v and last_v: # direction of vps bridged network interface needs to # be reversed _in = fix_flow((v[1] - last_v[1]) * 8.0 / t_elapse) _out = fix_flow((v[0] - last_v[0]) * 8.0 / t_elapse) _in = (vif.bandwidth and vif.bandwidth * 1024 * 1024 < _in) and vif.bandwidth * 1024 * 1024 or _in _out = (vif.bandwidth and vif.bandwidth * 1024 * 1024 < _out) and vif.bandwidth * 1024 * 1024 or _out payload.append("vps.netflow.%d.in" % (vps_id), ts, _in) payload.append("vps.netflow.%d.out" % (vps_id), ts, _out) if conf.LARGE_NETFLOW and _in >= conf.LARGE_NETFLOW or _out >= conf.LARGE_NETFLOW: self.logger_misc.warn( "%s in: %.3f mbps, out: %.3f mbps" % (ifname, _in / 1024.0 / 1024.0, _out / 1024.0 / 1024.0)) # disk if conf.USE_LVM and self.last_diskstat: for disk in xv.data_disks.values(): v = disk_result.get(disk.dev) last_v = self.last_diskstat.get(disk.dev) if not last_v: continue read_ops, read_byte, write_ops, write_byte, util = diskstat.cal_stat( v, last_v, t_elapse) print disk.xen_dev payload.append( "vps.io.%d.%s.ops.read" % (vps_id, disk.xen_dev), ts, read_ops) payload.append( "vps.io.%d.%s.ops.write" % (vps_id, disk.xen_dev), ts, write_ops) payload.append( "vps.io.%d.%s.traffic.read" % (vps_id, disk.xen_dev), ts, read_byte) payload.append( "vps.io.%d.%s.traffic.write" % (vps_id, disk.xen_dev), ts, write_byte) payload.append( "vps.io.%d.%s.util" % (vps_id, disk.xen_dev), ts, util) v = disk_result.get(xv.swap_store.dev) last_v = self.last_diskstat.get(xv.swap_store.dev) if v and last_v: read_ops, read_byte, write_ops, write_byte, util = diskstat.cal_stat( v, last_v, t_elapse) payload.append( "vps.io.%d.swap.ops.read" % (vps_id), ts, read_ops) payload.append( "vps.io.%d.swap.ops.write" % (vps_id), ts, write_ops) payload.append( "vps.io.%d.swap.traffic.read" % (vps_id), ts, read_byte) payload.append( "vps.io.%d.swap.traffic.write" % (vps_id), ts, write_byte) payload.append("vps.io.%d.swap.util" % (vps_id), ts, util) self.last_netflow = net_result self.last_diskstat = disk_result self.last_monitor_ts = ts except Exception, e: self.logger_misc.exception(e) return