def update_diag(loop_end=None, loop_start=None, syncrecord_start=None, sync_start=None, backend_status=None): try: observer_name = Config().observer_name except: observer_name = 'openstack' try: diag = Diag.objects.filter(name=observer_name).first() if (not diag): diag = Diag(name=observer_name) br_str = diag.backend_register br = json.loads(br_str) if loop_end: br['last_run'] = loop_end if loop_end and loop_start: br['last_duration'] = loop_end - loop_start if syncrecord_start: br['last_syncrecord_start'] = syncrecord_start if sync_start: br['last_synchronizer_start'] = sync_start if backend_status: diag.backend_status = backend_status diag.backend_register = json.dumps(br) diag.save() except: logger.log_exc("Exception in update_diag") traceback.print_exc()
def run_policy_once(): from core.models import Instance,Slice,Controller,Network,User,SlicePrivilege,Site,SitePrivilege,Image,ControllerSlice,ControllerUser,ControllerSite models = [Controller, Site, SitePrivilege, Image, ControllerSlice, ControllerSite, ControllerUser, User, Slice, Network, Instance, SlicePrivilege] objects = [] deleted_objects = [] for m in models: res = m.objects.filter((Q(policed__lt=F('updated')) | Q(policed=None)) & Q(no_policy=False)) objects.extend(res) res = m.deleted_objects.filter(Q(policed__lt=F('updated')) | Q(policed=None)) deleted_objects.extend(res) for o in objects: execute_model_policy(o, o.deleted) for o in deleted_objects: execute_model_policy(o, True) # Reap non-sync'd models here reaped = [Slice] for m in reaped: dobjs = m.deleted_objects.all() for d in dobjs: deps = walk_inv_deps(noop, d) if (not deps): print 'Purging object %r'%d d.delete(purge=True) try: reset_queries() except: # this shouldn't happen, but in case it does, catch it... logger.log_exc("exception in reset_queries")
def check_db_connection_okay(self): # django implodes if the database connection is closed by # docker-compose try: diag = Diag.objects.filter(name="foo").first() except Exception as e: from django import db if "connection already closed" in traceback.format_exc(): logger.error("XXX connection already closed") try: # if db.connection: # db.connection.close() db.close_old_connections() except: logger.log_exc("XXX we failed to fix the failure") else: logger.log_exc("XXX some other error")
def run(self): for item in self.pubsub.listen(): channel = item['channel'] try: entry = self.watch_map[channel] data = json.loads(item['data']) pk = data['pk'] changed_fields = data['changed_fields'] for w in entry: if w.into in changed_fields or not w.into: if (hasattr(w.source, 'handle_watched_object')): o = w.dest.objects.get(pk=data['pk']) step = w.source() step.handle_watched_object(o) except Exception as e: logger.log_exc( "XOS watcher: exception %s while processing object: %s" % (type(e), e)) pass
def run_policy_once(): from core.models import Instance, Slice, Controller, Network, User, SlicePrivilege, Site, SitePrivilege, Image, ControllerSlice, ControllerUser, ControllerSite models = [ Controller, Site, SitePrivilege, Image, ControllerSlice, ControllerSite, ControllerUser, User, Slice, Network, Instance, SlicePrivilege ] objects = [] deleted_objects = [] for m in models: res = m.objects.filter((Q(policed__lt=F('updated')) | Q(policed=None)) & Q(no_policy=False)) objects.extend(res) res = m.deleted_objects.filter( Q(policed__lt=F('updated')) | Q(policed=None)) deleted_objects.extend(res) for o in objects: execute_model_policy(o, o.deleted) for o in deleted_objects: execute_model_policy(o, True) # Reap non-sync'd models here reaped = [Slice] for m in reaped: dobjs = m.deleted_objects.all() for d in dobjs: deps = walk_inv_deps(noop, d) if (not deps): print 'Purging object %r' % d d.delete(purge=True) try: reset_queries() except: # this shouldn't happen, but in case it does, catch it... logger.log_exc("exception in reset_queries")
def execute_model_policy(instance, deleted): # Automatic dirtying if (instance in bad_instances): return # These are the models whose children get deleted when they are delete_policy_models = ['Slice','Instance','Network'] sender_name = instance.__class__.__name__ policy_name = 'model_policy_%s'%sender_name noargs = False if (not deleted): walk_inv_deps(update_dep, instance) walk_deps(update_wp, instance) elif (sender_name in delete_policy_models): walk_inv_deps(delete_if_inactive, instance) try: policy_handler = getattr(model_policies, policy_name, None) logger.error("POLICY HANDLER: %s %s" % (policy_name, policy_handler)) if policy_handler is not None: if (deleted): try: policy_handler.handle_delete(instance) except AttributeError: pass else: policy_handler.handle(instance) except: logger.log_exc("Model Policy Error:") try: instance.policed=datetime.now() instance.save(update_fields=['policed']) except: logging.error('Object %r is defective'%instance) bad_instances.append(instance)
def execute_model_policy(instance, deleted): # Automatic dirtying if (instance in bad_instances): return # These are the models whose children get deleted when they are delete_policy_models = ['Slice', 'Instance', 'Network'] sender_name = instance.__class__.__name__ policy_name = 'model_policy_%s' % sender_name noargs = False if (not deleted): walk_inv_deps(update_dep, instance) walk_deps(update_wp, instance) elif (sender_name in delete_policy_models): walk_inv_deps(delete_if_inactive, instance) try: policy_handler = getattr(model_policies, policy_name, None) logger.error("POLICY HANDLER: %s %s" % (policy_name, policy_handler)) if policy_handler is not None: if (deleted): try: policy_handler.handle_delete(instance) except AttributeError: pass else: policy_handler.handle(instance) except: logger.log_exc("Model Policy Error:") try: instance.policed = timezone.now() instance.save(update_fields=['policed']) except: logging.error('Object %r is defective' % instance) bad_instances.append(instance)
def run_once(self): try: model_accessor.check_db_connection_okay() loop_start = time.time() error_map_file = Config.get('error_map_path') self.error_mapper = ErrorMapper(error_map_file) # Two passes. One for sync, the other for deletion. for deletion in [False, True]: # Set of individual objects within steps that failed self.failed_step_objects = set() # Set up conditions and step status # This is needed for steps to run in parallel # while obeying dependencies. providers = set() dependency_graph = self.dependency_graph if not deletion else self.deletion_dependency_graph for v in dependency_graph.values(): if (v): providers.update(v) self.step_conditions = {} self.step_status = {} for p in list(providers): self.step_conditions[p] = threading.Condition() self.step_status[p] = STEP_STATUS_WORKING self.failed_steps = [] threads = [] logger.debug('Deletion=%r...' % deletion) schedule = self.ordered_steps if not deletion else reversed( self.ordered_steps) for S in schedule: thread = threading.Thread(target=self.sync, name='synchronizer', args=(S, deletion)) logger.debug('Deletion=%r...' % deletion) threads.append(thread) # Start threads for t in threads: t.start() # another spot to clean up debug state try: model_accessor.reset_queries() except: # this shouldn't happen, but in case it does, catch it... logger.log_exc("exception in reset_queries") # Wait for all threads to finish before continuing with the run # loop for t in threads: t.join() self.save_run_times() loop_end = time.time() model_accessor.update_diag(loop_end=loop_end, loop_start=loop_start, backend_status="1 - Bottom Of Loop") except Exception as e: logger.error( 'Core error. This seems like a misconfiguration or bug: %r. This error will not be relayed to the user!' % e) logger.log_exc("Exception in observer run loop") traceback.print_exc() model_accessor.update_diag( backend_status="2 - Exception in Event Loop")
def sync(self, S, deletion): try: step = self.lookup_step_class(S) start_time = time.time() logger.debug("Starting to work on step %s, deletion=%s" % (step.__name__, str(deletion))) dependency_graph = self.dependency_graph if not deletion else self.deletion_dependency_graph # if not deletion else self.deletion_step_conditions step_conditions = self.step_conditions step_status = self.step_status # if not deletion else self.deletion_step_status # Wait for step dependencies to be met try: deps = dependency_graph[S] has_deps = True except KeyError: has_deps = False go = True failed_dep = None if (has_deps): for d in deps: if d == step.__name__: logger.debug(" step %s self-wait skipped" % step.__name__) go = True continue cond = step_conditions[d] cond.acquire() if (step_status[d] is STEP_STATUS_WORKING): logger.debug(" step %s wait on dep %s" % (step.__name__, d)) cond.wait() logger.debug(" step %s wait on dep %s cond returned" % (step.__name__, d)) elif step_status[d] == STEP_STATUS_OK: go = True else: logger.debug(" step %s has failed dep %s" % (step.__name__, d)) go = False failed_dep = d cond.release() if (not go): break else: go = True if (not go): logger.debug("Step %s skipped" % step.__name__) self.failed_steps.append(step) my_status = STEP_STATUS_KO else: sync_step = self.lookup_step(S) sync_step.__name__ = step.__name__ sync_step.dependencies = [] try: mlist = sync_step.provides try: for m in mlist: lst = self.model_dependency_graph[m.__name__] nlst = map(lambda a_b: a_b[1], lst) sync_step.dependencies.extend(nlst) except Exception as e: raise e except KeyError: pass sync_step.debug_mode = debug_mode should_run = False try: # Various checks that decide whether # this step runs or not self.check_class_dependency( sync_step, self.failed_steps) # dont run Slices if Sites failed # dont run sync_network_routes if time since last run < 1 # hour self.check_schedule(sync_step, deletion) should_run = True except StepNotReady: logger.info('Step not ready: %s' % sync_step.__name__) self.failed_steps.append(sync_step) my_status = STEP_STATUS_KO except Exception as e: logger.error('%r' % e) logger.log_exc("sync step failed: %r. Deletion: %r" % (sync_step, deletion)) self.failed_steps.append(sync_step) my_status = STEP_STATUS_KO if (should_run): try: duration = time.time() - start_time logger.debug('Executing step %s, deletion=%s' % (sync_step.__name__, deletion)) failed_objects = sync_step(failed=list( self.failed_step_objects), deletion=deletion) self.check_duration(sync_step, duration) if failed_objects: self.failed_step_objects.update(failed_objects) logger.debug("Step %r succeeded, deletion=%s" % (sync_step.__name__, deletion)) my_status = STEP_STATUS_OK self.update_run_time(sync_step, deletion) except Exception as e: logger.error( 'Model step %r failed. This seems like a misconfiguration or bug: %r. This error will not be relayed to the user!' % (sync_step.__name__, e)) logger.log_exc("Exception in sync step") self.failed_steps.append(S) my_status = STEP_STATUS_KO else: logger.info("Step %r succeeded due to non-run" % step) my_status = STEP_STATUS_OK try: my_cond = step_conditions[S] my_cond.acquire() step_status[S] = my_status my_cond.notify_all() my_cond.release() except KeyError as e: logger.debug('Step %r is a leaf' % step) pass finally: try: model_accessor.reset_queries() except: # this shouldn't happen, but in case it does, catch it... logger.log_exc("exception in reset_queries") model_accessor.connection_close()