def __init__(self, name, cleanup=None, reset_running=False, initial_submit=False, max_threads=None, preserve_order=False, max_faults=None, mrt_limit=None): self.logger = PrefixLoggerAdapter(logger, name) self.name = name self.job_classes = {} self.collection_name = self.COLLECTION_BASE + self.name self.collection = get_db()[self.collection_name] self.active_mrt = {} # ReduceTask -> Job instance self.cleanup_callback = cleanup self.reset_running = reset_running self.ignored = [] self.initial_submit = initial_submit self.initial_submit_next_check = {} # job class -> timestamp self.max_threads = max_threads self.preserve_order = preserve_order self.max_faults = max_faults self.mrt_limit = mrt_limit self.mrt_overload = False self.running_lock = threading.Lock() self.running_count = defaultdict(int) # Group -> Count self.log_jobs = None self.metrics = MetricsHub( "noc.scheduler.%s" % name, "jobs.count", "jobs.success", "jobs.failed", "jobs.dereference.count", "jobs.dereference.success", "jobs.dereference.failed", "jobs.time" )
def __init__(self, name, local=False): self.logger = PrefixLoggerAdapter(logger, name) if name not in self.COLLECTIONS: self.logger.error("Invalid collection '%s'", name) raise ValueError("Invalid collection '%s'" % name) m, c = name.split(".", 1) self.module = m self.cname = name self.name = c self.local = local self.doc = self.COLLECTIONS[name] self.items = {} # uuid -> CollectionItem self.changed = False self.ref_cache = {} self.partial = set() if hasattr(self.doc, "name"): # Use .name field when present self.get_name = attrgetter("name") else: # Or first unique field otherwise uname = None for spec in self.doc._meta["index_specs"]: if spec["unique"] and len(spec["fields"]) == 1: uname = spec["fields"][0][0] if not uname: self.logger.error("Cannot find unique index") raise ValueError("No unique index") self.get_name = attrgetter(uname) self.translations = self.TRANSLATIONS.get(name, self.TRANSLATIONS[None])
def __init__(self, factory, socket=None): self.logger = PrefixLoggerAdapter(logger, self.get_label()) self.factory = factory self.socket = socket self.start_time = time.time() self.last_read = self.start_time + 100 # @todo: Meaningful value self.name = None self.closing = False # In closing state self.stale = False # Closed as stale self.ttl = self.TTL self.set_timeout(self.TTL) self.factory.register_socket(self) if socket: self.set_status(r=True)
def __init__(self, object): self.object = object self.logger = PrefixLoggerAdapter(logger, self.object.name) self.env = None self.templates = {} # fact class -> template self.fcls = {} # template -> Fact class self.facts = {} # Index -> Fact self.rn = 0 # Rule number self.config = None # Cached config self.interface_ranges = None with self.ILOCK: self.AC_POLICY_VIOLATION = AlarmClass.objects.filter( name="Config | Policy Violation").first() if not self.AC_POLICY_VIOLATION: logger.error("Alarm class 'Config | Policy Violation' is not found. Alarms cannot be raised")
class Engine(object): ILOCK = threading.Lock() AC_POLICY_VIOLATION = None def __init__(self, object): self.object = object self.logger = PrefixLoggerAdapter(logger, self.object.name) self.env = None self.templates = {} # fact class -> template self.fcls = {} # template -> Fact class self.facts = {} # Index -> Fact self.rn = 0 # Rule number self.config = None # Cached config self.interface_ranges = None with self.ILOCK: self.AC_POLICY_VIOLATION = AlarmClass.objects.filter( name="Config | Policy Violation").first() if not self.AC_POLICY_VIOLATION: logger.error("Alarm class 'Config | Policy Violation' is not found. Alarms cannot be raised") def get_template(self, fact): if fact.cls not in self.templates: self.logger.debug("Creating template %s", fact.cls) self.templates[fact.cls] = self.env.BuildTemplate( fact.cls, fact.get_template()) self.fcls[fact.cls] = fact.__class__ self.logger.debug("Define template %s", self.templates[fact.cls].PPForm()) return self.templates[fact.cls] def get_rule_number(self): return self.rn def assert_fact(self, fact): f = self.get_template(fact).BuildFact() f.AssignSlotDefaults() for k, v in fact.iter_factitems(): if v is None or v == [] or v == tuple(): continue if isinstance(v, basestring): v = v.replace("\n", "\\n") f.Slots[k] = v try: f.Assert() except clips.ClipsError, why: self.logger.error("Could not assert: %s", f.PPForm()) self.logger.error( "CLIPS Error: %s\n%s", why, clips.ErrorStream.Read() ) return self.facts[f.Index] = fact self.logger.debug("Assert %s", f.PPForm())
def __init__(self, name, is_superuser, enabled, user, uid, group, gid, instance_id, config_path): self.logger = PrefixLoggerAdapter(logger, "%s#%s" % (name, instance_id)) self.logger.info("Reading config") self.instance_id = instance_id self.name = name self.config_path = config_path self.config = ConfigParser.SafeConfigParser() self.config.read("etc/%s.defaults" % name) self.config.read(config_path) self.enabled = enabled self.pid = None self.pidfile = self.config.get("main", "pidfile")\ .replace("{{instance}}", self.instance_id) self.is_superuser = is_superuser self.user = user self.uid = uid self.group = group self.gid = gid
def __init__(self, name="pool", metrics_prefix=None, start_threads=1, max_threads=10, min_spare=1, max_spare=1, backlog=0): if min_spare > max_spare: raise ValueError("min_spare (%d) must not be greater" " than max_spare (%d)" % (min_spare, max_spare)) if start_threads > max_threads: raise ValueError("start_threads (%d) must not be greater" " than max_threads (%d)" % (start_threads, max_threads)) self.logger = PrefixLoggerAdapter(logger, name) self.name = name if not metrics_prefix: metrics_prefix = "noc" metrics_prefix += "pool.%s" % name self.metrics = MetricsHub( metrics_prefix, "threads.running", "threads.idle", "queue.len" ) self.start_threads = start_threads self.max_threads = max_threads self.min_spare = min_spare self.max_spare = max_spare self.backlog = backlog if backlog else max_threads self.t_lock = Lock() self.threads = set() self.queue = Queue(backlog) self.stopping = False self.stopped = Event() self.n_idle = 0 self.idle_lock = Lock() self.logger.info("Running thread pool '%s'", self.name) self.set_idle(None)
def __init__(self, scheduler, key=None, data=None, schedule=None): self.scheduler = scheduler self.key = key self.data = data or {} self.schedule = schedule or {} self.object = None # Set by dereference() self.started = None # Timestamp self._log = [] self.on_complete = [] # List of (job_name, key) # to launch on complete self.to_log = scheduler and scheduler.to_log_jobs self.job_log = [] self.logger = PrefixLoggerAdapter( logger, "%s][%s][%s" % (self.scheduler.name, self.name, self.get_display_key())) if scheduler.to_log_jobs: self.logger = TeeLoggerAdapter(self.logger, self.job_log)
def configure(self, uuid, handler, interval, metrics, config, managed_object, **kwargs): if not self.uuid: self.logger = PrefixLoggerAdapter(logger, uuid) self.uuid = uuid self.handler_name = handler nh = probe_registry.get_handler(handler) if nh != self.handler: self.handler = nh self.probe = nh.im_class(self.daemon, self) if interval != self.interval: # Change offset self.offset = interval * random.random() self.interval = interval self.next_run = self.get_next_run() if not self.running: self.daemon.reschedule(self) self.config = config # Apply metrics if self.metrics != metrics: self.metrics = metrics c = set(self.mdata) n = set(m["metric_type"] for m in metrics) # Remove metrics for m in c - n: del self.mdata[m] # Create metrics for m in n - c: self.mdata[m] = Metric(self.daemon) # Configure metrics for m in metrics: m["managed_object"] = managed_object self.mdata[m["metric_type"]].configure(**m) if len(metrics) == 1: self.default_metric_type = metrics[0]["metric_type"] else: self.default_metric_type = None
class Probe(object): __metaclass__ = ProbeBase # Form class JS file name # Human-readable probe title. # Means only for human-configurable probes TITLE = None # Human-readable description # Means only for human-configurable probes DESCRIPTION = None # Human-readable tags for plugin classification. # List of strings # Means only for human-configurable probes TAGS = [] # Either list of field configuration or # string containing full JS class name # Means only for human-configurable probes CONFIG_FORM = None SNMP_v2c = noc.lib.snmp.consts.SNMP_v2c INVALID_OID_TTL = 3600 def __init__(self, daemon, task): self.daemon = daemon self.task = task self.missed_oids = {} # oid -> expire time self.logger = PrefixLoggerAdapter(logging.getLogger(self.__module__), self.task.uuid) def disable(self): raise NotImplementedError() def is_missed_oid(self, oid): t = self.missed_oids.get(oid) if t: if t > time.time(): return True else: del self.missed_oids[oid] return False def set_missed_oid(self, oid): self.logger.info("Disabling missed oid %s", oid) self.missed_oids[oid] = time.time() + self.INVALID_OID_TTL def set_convert(self, metric, convert=None, scale=None): """ Change metric conversions """ self.task.set_metric_convert(metric, convert, scale) def snmp_get(self, oids, address, port=161, community="public", version=SNMP_v2c): """ Perform SNMP request to one or more OIDs. oids can be string or dict. When oid is string returns value When oid is dict of <metric type> : oid, returns dict of <metric type>: value """ if isinstance(oids, basestring): if self.is_missed_oid(oids): return None # Missed oid elif isinstance(oids, dict): oids = dict((k, v) for k, v in oids.iteritems() if not self.is_missed_oid(v)) if not oids: return None # All oids are missed try: result = self.daemon.io.snmp_get(oids, address, port, community=community, version=version) except SNMPError, why: if why.code == NO_SUCH_NAME: # Disable invalid oid self.set_missed_oid(why.oid) return None if isinstance(result, dict): for k in result: if result[k] is None: self.set_missed_oid(result[k]) return result
def __init__(self, daemon, name): self.daemon = daemon self.name = name self.logger = PrefixLoggerAdapter(daemon.logger, name) self.logger.info("Starting %s (%s)", name, self.type) self.cmd_queue = []
class DaemonData(object): """ Daemon wrapper """ def __init__(self, name, is_superuser, enabled, user, uid, group, gid, instance_id, config_path): self.logger = PrefixLoggerAdapter(logger, "%s#%s" % (name, instance_id)) self.logger.info("Reading config") self.instance_id = instance_id self.name = name self.config_path = config_path self.config = ConfigParser.SafeConfigParser() self.config.read("etc/%s.defaults" % name) self.config.read(config_path) self.enabled = enabled self.pid = None self.pidfile = self.config.get("main", "pidfile")\ .replace("{{instance}}", self.instance_id) self.is_superuser = is_superuser self.user = user self.uid = uid self.group = group self.gid = gid def __repr__(self): return "<DaemonData %s>" % self.name def launch(self): """ Launch daemon """ logger.info("Launching") try: pid = os.fork() except OSError, e: self.logger.error("Fork failed: %s(%s)", e.strerror, e.errno) return if pid: self.pid = pid self.logger.info("Daemon started as PID %d", self.pid) else: # Run child try: if self.group: os.setgid(self.gid) os.setegid(self.gid) if self.user: os.setuid(self.uid) os.seteuid(self.uid) # Set up EGG Cache to prevent permissions problem in python 2.6 os.environ[ "PYTHON_EGG_CACHE"] = "/tmp/.egg-cache%d" % self.uid # Adjust HOME and USER environment variables os.environ["USER"] = self.user os.environ["HOME"] = pwd.getpwuid(self.uid).pw_dir os.execv(sys.executable, [ sys.executable, "./scripts/%s.py" % self.name, "launch", "-c", self.config_path, "-i", self.instance_id ]) except OSError, e: self.logger.error("OS Error: %s(%s)", e.strerror, e.errno) sys.exit(1)
class Scheduler(object): COLLECTION_BASE = "noc.schedules." ATTR_TS = "ts" ATTR_CLASS = "jcls" ATTR_STATUS = "s" ATTR_TIMEOUT = "timeout" ATTR_KEY = "key" ATTR_DATA = "data" ATTR_SCHEDULE = "schedule" ATTR_LAST = "last" # last run ATTR_LAST_STATUS = "ls" # last completion status ATTR_LAST_DURATION = "ldur" # last job duration ATTR_LAST_SUCCESS = "st" # last success timestamp ATTR_RUNS = "runs" # Number of runs ATTR_TRACEBACK = "tb" # Last error traceback ATTR_LOG = "log" # Job log ATTR_FAULTS = "f" # Amount of sequental faults # ATTR_STATUS values S_WAIT = "W" # Waiting to run S_RUN = "R" # Running S_STOP = "S" # Stopped by operator S_DISABLED = "D" # Disabled by system JobExists = JobExists IGNORE_MRT_CODES = set([ 12, # ERR_OVERLOAD 15, # ERR_ACTIVATOR_NOT_AVAILABLE 16, # ERR_DOWN 18, # ERR_ACTIVATOR_LOST 24, # ERR_SHARD_IS_DOWN ]) def __init__(self, name, cleanup=None, reset_running=False, initial_submit=False, max_threads=None, preserve_order=False, max_faults=None, mrt_limit=None): self.logger = PrefixLoggerAdapter(logger, name) self.name = name self.job_classes = {} self.collection_name = self.COLLECTION_BASE + self.name self.collection = get_db()[self.collection_name] self.active_mrt = {} # ReduceTask -> Job instance self.cleanup_callback = cleanup self.reset_running = reset_running self.ignored = [] self.initial_submit = initial_submit self.initial_submit_next_check = {} # job class -> timestamp self.max_threads = max_threads self.preserve_order = preserve_order self.max_faults = max_faults self.mrt_limit = mrt_limit self.mrt_overload = False self.running_lock = threading.Lock() self.running_count = defaultdict(int) # Group -> Count self.log_jobs = None self.metrics = MetricsHub( "noc.scheduler.%s" % name, "jobs.count", "jobs.success", "jobs.failed", "jobs.dereference.count", "jobs.dereference.success", "jobs.dereference.failed", "jobs.time" ) def ensure_indexes(self): if self.preserve_order: k = [("ts", 1), ("_id", 1)] else: k = [("ts", 1)] self.logger.debug("Checking indexes: %s", ", ".join(x[0] for x in k)) self.collection.ensure_index(k) self.logger.debug("Checking indexes: jcls, key") self.collection.ensure_index([("jcls", 1), ("key", 1)]) self.logger.debug("Checking indexes: s, ts, jcls") self.collection.ensure_index([("s", 1), ("ts", 1), ("jcls", 1)]) self.logger.debug("Checking indexes: key, s") self.collection.ensure_index([("s", 1), ("key", 1)]) self.logger.debug("Indexes are ready") def debug(self, msg): warnings.warn("Using deprecated Scheduler.debug() method", DeprecationWarning, stacklevel=2) self.logger.debug(msg) def info(self, msg): warnings.warn("Using deprecated Scheduler.info() method", DeprecationWarning, stacklevel=2) self.logger.info(msg) def error(self, msg): warnings.warn("Using deprecated Scheduler.error() method", DeprecationWarning, stacklevel=2) self.logger.error(msg) def register_job_class(self, cls): if not cls.name: return # Abstract classes s = " (ignored)" if cls.ignored else "" self.logger.info("Registering job class: %s%s", cls.name, s) self.job_classes[cls.name] = cls # Set up ignored jobs if cls.ignored: self.ignored += [cls.name] else: # Initialize job class cls.initialize(self) # Register intial submit handlers if (self.initial_submit and hasattr(cls, "initial_submit") and callable(cls.initial_submit) and hasattr(cls, "initial_submit_interval")): self.initial_submit_next_check[cls] = time.time() def register_all(self, path, exclude=None): """ Register all Job classes defined within directory :param path: :return: """ exclude = exclude or [] if not os.path.isdir(path): raise ValueError("'%s' must be a directory" % path) mr = "noc.%s." % ".".join(path.split(os.sep)) for f in os.listdir(path): if f in exclude or not f.endswith(".py"): continue mn = mr + f[:-3] # Full module name m = __import__(mn, {}, {}, "*") for on in dir(m): o = getattr(m, on) if (inspect.isclass(o) and issubclass(o, Job) and o.__module__.startswith(mn)): self.register_job_class(o) def get_job_class(self, name): return self.job_classes[name] def submit(self, job_name, key=None, data=None, schedule=None, ts=None): """ Submit new job """ if ts is None: ts = datetime.datetime.now() elif type(ts) in (int, long, float): ts = (datetime.datetime.now() + datetime.timedelta(seconds=ts)) # Check Job is not exists if key is not None: if self.collection.find_one({ self.ATTR_CLASS: job_name, self.ATTR_KEY: key }): raise JobExists() # Submit job id = self.collection.insert({ self.ATTR_TS: ts, self.ATTR_CLASS: job_name, self.ATTR_STATUS: self.S_WAIT, self.ATTR_KEY: key, self.ATTR_DATA: data, self.ATTR_SCHEDULE: schedule }, manipulate=True, safe=True) self.logger.info("Scheduling job %s(%s) id=%s at %s", job_name, key, id, ts) def remove_job(self, job_name, key): self.logger.info("Removing job %s(%s)", job_name, key) self.collection.remove({ self.ATTR_CLASS: job_name, self.ATTR_KEY: key }, safe=True) def reschedule_job(self, job_name, key, ts, status=None, duration=None, last_status=None, tb=None, log=None, update_runs=False, skip_running=False, faults=None): self.logger.info("Rescheduling job %s(%s) to %s%s", job_name, key, ts, " status=%s" % status if status else "") q = { self.ATTR_CLASS: job_name, self.ATTR_KEY: key } if skip_running: q[self.ATTR_STATUS] = self.S_WAIT s = { self.ATTR_TS: ts, self.ATTR_TRACEBACK: tb, self.ATTR_LOG: log or [] } if status: s[self.ATTR_STATUS] = status if last_status: s[self.ATTR_LAST_STATUS] = last_status if last_status == Job.S_SUCCESS: s[self.ATTR_LAST_SUCCESS] = datetime.datetime.now() if duration is not None: s[self.ATTR_LAST_DURATION] = duration if faults is not None: s[self.ATTR_FAULTS] = faults op = {"$set": s} if update_runs: op["$inc"] = {self.ATTR_RUNS: 1} self.collection.update(q, op, safe=True) def set_job_status(self, job_name, key, status): self.logger.info("Changing %s(%s) status to %s", job_name, key, status) self.collection.update({ self.ATTR_CLASS: job_name, self.ATTR_KEY: key }, { "$set": {self.ATTR_STATUS: status} }, safe=True) def run_job(self, job): """ Begin job execution :param job: :return: """ # Dereference job self.metrics.jobs_dereference_count += 1 if not job.dereference(): self.logger.info("Cannot dereference job %s(%s). Removing", job.name, job.key) self.remove_job(job.name, job.key) self.metrics.jobs_dereference_failed += 1 return self.metrics.jobs_dereference_success += 1 # Check threaded jobs limit if job.threaded and self.max_threads: if threading.active_count() >= self.max_threads: return # Check job can be run job.started = time.time() if not job.can_run(): job.logger.debug("Deferred") self._complete_job(job, job.S_DEFERRED, None) return # Change status s = "threaded " if job.threaded else "" job.logger.info("Running job") self.collection.update({ self.ATTR_CLASS: job.name, self.ATTR_KEY: job.key }, {"$set": { self.ATTR_STATUS: self.S_RUN, self.ATTR_LAST: datetime.datetime.fromtimestamp(job.started) }}) # if job.map_task: if job.beef and job.key in job.beef: # Do not run job, provide beef instead self._run_job_handler( job, object=job.get_managed_object(), result=job.beef[job.key]) else: job.logger.info("Running script %s", job.map_task) # Run in MRT mode t = ReduceTask.create_task( job.get_managed_object(), # Managed object is in key None, {}, job.map_task, job.get_map_task_params() ) self.active_mrt[t] = job else: self._run_job_handler(job) def _run_job_handler(self, job, **kwargs): if job.threaded: t = threading.Thread(target=self._job_wrapper, args=(job,), kwargs=kwargs ) t.daemon = True t.start() else: return self._job_wrapper(job, **kwargs) def _job_wrapper(self, job, **kwargs): tb = None t0 = time.time() job.logger.info("Running job handler") try: r = job.handler(**kwargs) except Exception: # error_report() tb = get_traceback() job.error(tb) job.on_exception() s = job.S_EXCEPTION else: if r: job.logger.info("Job completed successfully (%.2fms)", (time.time() - t0) * 1000) job.on_success() s = job.S_SUCCESS else: job.logger.info("Job failed (%fsec)", time.time() - t0 ) job.on_failure() s = job.S_FAILED self._complete_job(job, s, tb) def _complete_job(self, job, status, tb): self.metrics.jobs_time.timer(self.name, job.name, job.key).log( job.started, time.time(), status) if self.to_log_jobs: path = os.path.join(self.log_jobs, job.name, str(job.key)) safe_rewrite(path, job.get_job_log()) group = job.get_group() if group is not None: with self.running_lock: self.running_count[group] -= 1 if not self.running_count[group]: del self.running_count[group] on_complete = job.on_complete t = job.get_schedule(status) if t is None: # Unschedule job self.remove_job(job.name, job.key) else: # Reschedule job t1 = time.time() if self.max_faults and status in (Job.S_FAILED, Job.S_EXCEPTION): code = None if type(tb) == dict: code = tb.get("code") if code in self.IGNORE_MRT_CODES: fc = None # Ignore temporary errors next_status = self.S_WAIT else: # Get fault count fc = self.get_faults(job.name, job.key) + 1 if fc >= self.max_faults: # Disable job next_status = self.S_DISABLED self.logger.info("Disabling job %s(%s) due to %d sequental faults", job.name, job.key, fc) else: next_status = self.S_WAIT else: next_status = self.S_WAIT fc = 0 self.reschedule_job( job.name, job.key, t, status=next_status, last_status=status, duration=t1 - job.started, # @todo: maybe error tb=tb, update_runs=True, faults=fc ) # Reschedule jobs must be executed on complete for job_name, key in on_complete: ts = datetime.datetime.now() self.reschedule_job(job_name, key, ts, skip_running=True) def complete_mrt_job(self, t): job = self.active_mrt.pop(t) for m in t.maptask_set.all(): if m.status == "C": self._run_job_handler(job, object=m.managed_object, result=m.script_result) else: self.logger.info("Job %s(%s) is failed", job.name, job.get_display_key()) self._complete_job(job, job.S_FAILED, m.script_result) t.delete() def iter_pending_jobs(self): """ Iterate pending jobs """ q = { self.ATTR_TS: {"$lte": datetime.datetime.now()}, self.ATTR_STATUS: self.S_WAIT } if self.ignored: q[self.ATTR_CLASS] = {"$nin": self.ignored} # Get remaining pending tasks qs = self.collection.find(q) if self.preserve_order: qs = qs.sort([(self.ATTR_TS, 1), ("_id", 1)]) else: qs = qs.sort(self.ATTR_TS) try: for job in qs.batch_size(100): yield job except pymongo.errors.CursorNotFound: self.logger.info("Server cursor timed out. Waiting for next cycle") except pymongo.errors.OperationFailure, why: self.logger.error("Operation failure: %s", why) self.logger.error("Trying to recover")
class Collection(object): TRANSLATIONS = {} ALLOW_FUZZY = {} COLLECTIONS = {} COLLECTION_ORDER = [] def __init__(self, name, local=False): self.logger = PrefixLoggerAdapter(logger, name) if name not in self.COLLECTIONS: self.logger.error("Invalid collection '%s'", name) raise ValueError("Invalid collection '%s'" % name) m, c = name.split(".", 1) self.module = m self.cname = name self.name = c self.local = local self.doc = self.COLLECTIONS[name] self.items = {} # uuid -> CollectionItem self.changed = False self.ref_cache = {} self.partial = set() if hasattr(self.doc, "name"): # Use .name field when present self.get_name = attrgetter("name") else: # Or first unique field otherwise uname = None for spec in self.doc._meta["index_specs"]: if spec["unique"] and len(spec["fields"]) == 1: uname = spec["fields"][0][0] if not uname: self.logger.error("Cannot find unique index") raise ValueError("No unique index") self.get_name = attrgetter(uname) self.translations = self.TRANSLATIONS.get(name, self.TRANSLATIONS[None]) def __unicode__(self): return self.name def die(self, msg): raise ValueError(msg) def get_collection_path(self): if self.local: return os.path.join("local", "collections", self.module, self.name + ".csv") else: return os.path.join(self.module, "collections", self.name, "manifest.csv") def get_item_path(self, mi): return os.path.join(self.module, "collections", self.name, mi.path) def load(self): """ Load collection from CSV file """ path = self.get_collection_path() if not os.path.exists(path): return with open(path) as f: reader = csv.reader(f) reader.next() # Skip header for name, uuid, path, hash in reader: uuid = UUID(uuid) mi = CollectionItem(name=name, uuid=uuid, path=path, hash=hash) self.items[uuid] = mi def save(self): self.logger.info("Updating manifest") rows = sorted( ([r.name, r.uuid, r.path, r.hash] for r in self.items.values()), key=lambda x: x[0]) rows = [["name", "uuid", "path", "hash"]] + rows out = StringIO() writer = csv.writer(out) writer.writerows(rows) safe_rewrite(self.get_collection_path(), out.getvalue(), mode=0644) # Update collection cache self.logger.info("Updating CollectionCache") CollectionCache.merge("%s.%s" % (self.module, self.name), set(self.items)) def load_item(self, mi): p = self.get_item_path(mi) if not os.path.exists(p): self.die("File not found: %s" % p) with open(p) as f: fdata = f.read() try: data = json_decode(fdata) except ValueError, why: self.die("Failed to read JSON file '%s': %s" % (p, why)) if not isinstance(data, dict): self.die("Invalid JSON file: %s" % p) if self.get_hash(fdata) != mi.hash: self.die("Checksum mismatch for file '%s'" % p) return data
class KeyValueStore(object): TABLE = "kv" def __init__(self, path, indexes=None, fields=None): self.logger = PrefixLoggerAdapter(logger, path) self.path = path self.fields = ["uuid"] + list(fields or []) self.indexes = indexes or [] self.connect = None def get_connect(self): if not self.connect: is_empty = not os.path.exists(self.path) self.logger.info("Connecting to database") self.connect = sqlite3.connect(self.path) self.logger.debug("SQLite version %s", sqlite3.version) if is_empty: self.logger.info("Formatting key-value store") c = self.connect.cursor() fields = ["%s TEXT" % f for f in self.fields] c.execute("CREATE TABLE %s(%s)" % (self.TABLE, ",".join(fields))) for i in self.indexes: self.logger.debug("Indexing %s", i) c.execute("CREATE INDEX x_%s_%s ON %s(%s)" % (self.TABLE, i, self.TABLE, i)) self.connect.commit() return self.connect def commit(self): self.logger.debug("Commit") connect = self.get_connect() connect.commit() def cursor(self): connect = self.get_connect() return connect.cursor() def get(self, **kwargs): where = [] args = [] for k in kwargs: where += ["%s = ?" % k] args += [kwargs[k]] sql = "SELECT %s FROM %s WHERE %s" % (", ".join( self.fields), self.TABLE, " AND ".join(where)) self.logger.debug("%s %s", sql, args) c = self.cursor() c.execute(sql, tuple(args)) r = c.fetchone() if not r: return None return dict(zip(self.fields, r)) def find(self, **kwargs): where = [] args = [] for k in kwargs: where += ["%s = ?" % k] args += [kwargs[k]] sql = "SELECT %s FROM %s" % (", ".join(self.fields), self.TABLE) if where: sql += " WHERE %s" % " AND ".join(where) self.logger.debug("%s %s", sql, args) c = self.cursor() c.execute(sql, tuple(args)) data = [] for r in c.fetchall(): data += [dict(zip(self.fields, r))] return data def put(self, uuid, **kwargs): self.logger.debug("PUT: uuid=%s, %s", uuid, kwargs) if self.get(uuid=uuid): sop = [] args = [] for k in kwargs: sop += ["%s = ?" % k] args += [kwargs[k]] args += [uuid] sql = "UPDATE %s SET %s WHERE uuid=?" % (self.TABLE, ", ".join(sop)) self.logger.debug("%s %s", sql, args) c = self.cursor() c.execute(sql, tuple(args)) else: sf = ["uuid"] args = [uuid] for k in kwargs: sf += [k] args += [kwargs[k]] c = self.cursor() c.execute( "INSERT INTO %s(%s) VALUES(%s)" % (self.TABLE, ", ".join(sf), ", ".join(["?"] * (len(kwargs) + 1))), tuple(args)) self.commit() def delete(self, uuid): self.logger.debug("DELETE %s", uuid) sql = "DELETE FROM %s WHERE uuid=?" % self.TABLE self.logger.debug("%s %s", sql, (uuid, )) c = self.cursor() c.execute(sql, (uuid, )) self.commit()
class Pool(object): def __init__(self, name="pool", metrics_prefix=None, start_threads=1, max_threads=10, min_spare=1, max_spare=1, backlog=0): if min_spare > max_spare: raise ValueError("min_spare (%d) must not be greater" " than max_spare (%d)" % (min_spare, max_spare)) if start_threads > max_threads: raise ValueError("start_threads (%d) must not be greater" " than max_threads (%d)" % (start_threads, max_threads)) self.logger = PrefixLoggerAdapter(logger, name) self.name = name if not metrics_prefix: metrics_prefix = "noc" metrics_prefix += "pool.%s" % name self.metrics = MetricsHub( metrics_prefix, "threads.running", "threads.idle", "queue.len" ) self.start_threads = start_threads self.max_threads = max_threads self.min_spare = min_spare self.max_spare = max_spare self.backlog = backlog if backlog else max_threads self.t_lock = Lock() self.threads = set() self.queue = Queue(backlog) self.stopping = False self.stopped = Event() self.n_idle = 0 self.idle_lock = Lock() self.logger.info("Running thread pool '%s'", self.name) self.set_idle(None) def set_idle(self, status): with self.idle_lock: if status is not None: self.n_idle += 1 if status else -1 n = len(self.threads) self.metrics.threads_idle = self.n_idle self.metrics.threads_running = n self.metrics.queue_len = self.queue.qsize() if (not status and self.n_idle < self.min_spare and n < self.max_threads): # Run additional thread w = Worker(self, self.queue) self.threads.add(w) w.start() elif status and (self.n_idle > self.max_spare or n > self.max_threads): # Stop one thread self.queue.put(None) def thread_done(self, t): with self.t_lock: if t in self.threads: self.threads.remove(t) if self.stopping and not len(self.threads): self.stopped.set() def get_status(self): s = [] t = time.time() with self.t_lock: for w in self.threads: if w.is_idle: s += [{ "id": w.ident, "status": "IDLE" }] else: s += [{ "id": w.ident, "status": "RUN", "title": w.title, "start": w.start_time, "duration": t - w.start_time }] return s def stop(self, timeout=3): self.stopping = True with self.t_lock: n = len(self.threads) if not n: return # Stopped for i in range(n): self.queue.put(None) # Send shutdown signals # Wait for clean stop self.stopped.wait(timeout) if self.stopped.is_set(): return # Forcefully cancel with self.t_lock: for t in self.threads: if t.is_alive(): t.cancel() time.sleep(timeout) def run(self, title, target, args=(), kwargs={}): if self.stopping: return self.queue.put((title, target, args, kwargs)) def configure(self, max_threads=None, min_spare=None, max_spare=None, backlog=None): if max_threads is not None: self.max_threads = max_threads if min_spare is not None: self.min_spare = min_spare if max_spare is not None: self.max_spare = max_spare if backlog is not None: self.backlog = backlog
class SyncHandler(object): type = None # Set to handler type # DictParameter instance used to parse and clean config config = {} def __init__(self, daemon, name): self.daemon = daemon self.name = name self.logger = PrefixLoggerAdapter(daemon.logger, name) self.logger.info("Starting %s (%s)", name, self.type) self.cmd_queue = [] def configure(self, **kwargs): pass def close(self): """ Called when handler is closed """ pass def on_create(self, uuid, data): """ Object first seen """ pass def on_delete(self, uuid): """ Object removed """ pass def on_change(self, uuid, data): """ Object changed """ pass def on_configuration_done(self): """ End of configuration round """ for c in self.cmd_queue: self.run_command(c) self.cmd_queue = [] def get_command(self, cmd, **ctx): for v in ctx: cmd = cmd.replace("{%s}" % v, str(ctx[v])) return cmd def queue_command(self, cmd, once=False, **ctx): if not cmd: return cmd = self.get_command(cmd, **ctx) if not once or cmd not in self.cmd_queue: self.logger.debug("Queueing command: %s", cmd) self.cmd_queue += [cmd] def run_command(self, cmd, **ctx): """ Run shell command with given context """ if not cmd: return cmd = self.get_command(cmd, **ctx) self.logger.info("Running '%s'" % cmd) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() output = p.stdout.read() if output: self.logger.debug("Output:\n%s", output) if p.returncode == 0: self.logger.debug("Success") else: self.logger.info("Failed (retcode %s)", p.returncode)
class Socket(object): """ Abstract non-blocking socket wrapper """ TTL = None # maximum time to live in seconds READ_CHUNK = 65536 # @todo: configuration parameter CLOSE_ON_ERROR = True # Call .close() from .on_error() def __init__(self, factory, socket=None): self.logger = PrefixLoggerAdapter(logger, self.get_label()) self.factory = factory self.socket = socket self.start_time = time.time() self.last_read = self.start_time + 100 # @todo: Meaningful value self.name = None self.closing = False # In closing state self.stale = False # Closed as stale self.ttl = self.TTL self.set_timeout(self.TTL) self.factory.register_socket(self) if socket: self.set_status(r=True) def __repr__(self): return "<%s(0x%x, %s)>" % ( self.__class__.__name__, id(self), ", ".join(self.get_flags())) def get_label(self): return self.__class__.__name__ def get_flags(self): """ Returns list of flags :return: """ if not hasattr(self, "socket"): return ["init"] f = [] if self.closing: f += ["closing"] if self.stale: f += ["stale"] return f def create_socket(self): """ Performs actial socket creation and initialization and pust socket into nonblocking mode. """ if not self.socket_is_ready(): # Socket was not created raise SocketNotImplemented() self.socket.setblocking(0) self.set_status(r=True) self.update_status() def set_timeout(self, ttl): """ Change socket timeout :param ttl: Timeout in seconds :type ttl: int """ if ttl and ttl != self.ttl: self.logger.debug("Set timeout to %s secs" % ttl) self.ttl = ttl def socket_is_ready(self): """ Check socket is created and ready for operation :rtype: bool """ return self.socket is not None def fileno(self): """ Get socket system file id :return: file id or None :rtype: int or None """ return self.socket.fileno() if self.socket else None def handle_read(self): """ Read handler. Called every time when socket has data available to be reading. """ pass def handle_write(self): """ Read handler. Called every time when socket has data available to be written. """ pass def on_close(self): """ Close handler. Called on socket close. """ pass def on_error(self, exc): """ Error handler. Called on eny socket error. Default behavior is to emit error message and close the socket :param exc: SocketException """ self.logger.error(exc.message) if self.CLOSE_ON_ERROR: self.close() def close(self): """ Close socket and unregister from factory """ if self.closing: return self.logger.debug("Closing socket") self.closing = True if self.socket: self.factory.unregister_socket(self) if self.socket: try: self.socket.close() except socket.error, why: if why[0] not in IGNORABLE_CLOSE_ERRORS: error_report(logger=self.logger) self.socket = None self.on_close()
def __init__(self, path, indexes=None, fields=None): self.logger = PrefixLoggerAdapter(logger, path) self.path = path self.fields = ["uuid"] + list(fields or []) self.indexes = indexes or [] self.connect = None
def __init__(self, daemon, task): self.daemon = daemon self.task = task self.missed_oids = {} # oid -> expire time self.logger = PrefixLoggerAdapter(logging.getLogger(self.__module__), self.task.uuid)
def wipe(o): if o.profile_name.startswith("NOC."): return True log = PrefixLoggerAdapter(logger, str(o.id)) # Delete active map tasks log.debug("Wiping MAP tasks") MapTask.objects.filter(managed_object=o).delete() # Wiping discovery tasks log.debug("Wiping discovery tasks") db = get_db() db.noc.schedules.inv.discovery.remove({"key": o.id}) # Wiping FM events log.debug("Wiping events") NewEvent.objects.filter(managed_object=o.id).delete() FailedEvent.objects.filter(managed_object=o.id).delete() ActiveEvent.objects.filter(managed_object=o.id).delete() ArchivedEvent.objects.filter(managed_object=o.id).delete() # Wiping alarms log.debug("Wiping alarms") for ac in (ActiveAlarm, ArchivedAlarm): for a in ac.objects.filter(managed_object=o.id): # Relink root causes my_root = a.root for iac in (ActiveAlarm, ArchivedAlarm): for ia in iac.objects.filter(root=a.id): ia.root = my_root ia.save() # Delete alarm a.delete() # Wiping MAC DB log.debug("Wiping MAC DB") MACDB._get_collection().remove({"managed_object": o.id}) # Wiping pending link check log.debug("Wiping pending link check") PendingLinkCheck._get_collection().remove({"local_object": o.id}) PendingLinkCheck._get_collection().remove({"remote_object": o.id}) # Wiping discovery id cache log.debug("Wiping discovery id") DiscoveryID._get_collection().remove({"object": o.id}) # Wiping interfaces, subs and links # Wipe links log.debug("Wiping links") for i in Interface.objects.filter(managed_object=o.id): # @todo: Remove aggregated links correctly Link.objects.filter(interfaces=i.id).delete() # log.debug("Wiping subinterfaces") SubInterface.objects.filter(managed_object=o.id).delete() log.debug("Wiping interfaces") Interface.objects.filter(managed_object=o.id).delete() log.debug("Wiping forwarding instances") ForwardingInstance.objects.filter(managed_object=o.id).delete() # Unbind from IPAM log.debug("Unbind from IPAM") for a in Address.objects.filter(managed_object=o): a.managed_object = None a.save() # Wipe object status log.debug("Wiping object status") ObjectStatus.objects.filter(object=o.id).delete() # Wipe outages log.debug("Wiping outages") Outage.objects.filter(object=o.id).delete() # Wipe uptimes log.debug("Wiping uptimes") Uptime.objects.filter(object=o.id).delete() # Wipe reboots log.debug("Wiping reboots") Reboot.objects.filter(object=o.id).delete() # Delete Managed Object's capabilities log.debug("Wiping capabilitites") ObjectCapabilities.objects.filter(object=o.id).delete() # Delete Managed Object's facts log.debug("Wiping facts") ObjectFact.objects.filter(object=o.id).delete() # Delete Managed Object's attributes log.debug("Wiping attributes") ManagedObjectAttribute.objects.filter(managed_object=o).delete() # Detach from validation rule log.debug("Detaching from validation rules") for vr in ValidationRule.objects.filter(objects_list__object=o.id): vr.objects_list = [x for x in vr.objects_list if x.object.id != o.id] if not vr.objects_list and not vr.selectors_list: vr.is_active = False vr.save() # Finally delete object and config log.debug("Finally wiping object") o.delete() log.debug("Done")