def check_db_connection_okay(self): # django implodes if the database connection is closed by # docker-compose try: diag = Diag.objects.filter(name="foo").first() except Exception as e: from django import db if "connection already closed" in traceback.format_exc(): logger.error("XXX connection already closed") try: # if db.connection: # db.connection.close() db.close_old_connections() except: logger.log_exc("XXX we failed to fix the failure") else: logger.log_exc("XXX some other error")
def execute_model_policy(instance, deleted): # Automatic dirtying if (instance in bad_instances): return # These are the models whose children get deleted when they are delete_policy_models = ['Slice','Instance','Network'] sender_name = instance.__class__.__name__ policy_name = 'model_policy_%s'%sender_name noargs = False if (not deleted): walk_inv_deps(update_dep, instance) walk_deps(update_wp, instance) elif (sender_name in delete_policy_models): walk_inv_deps(delete_if_inactive, instance) try: policy_handler = getattr(model_policies, policy_name, None) logger.error("POLICY HANDLER: %s %s" % (policy_name, policy_handler)) if policy_handler is not None: if (deleted): try: policy_handler.handle_delete(instance) except AttributeError: pass else: policy_handler.handle(instance) except: logger.log_exc("Model Policy Error:") try: instance.policed=datetime.now() instance.save(update_fields=['policed']) except: logging.error('Object %r is defective'%instance) bad_instances.append(instance)
def execute_model_policy(instance, deleted): # Automatic dirtying if (instance in bad_instances): return # These are the models whose children get deleted when they are delete_policy_models = ['Slice', 'Instance', 'Network'] sender_name = instance.__class__.__name__ policy_name = 'model_policy_%s' % sender_name noargs = False if (not deleted): walk_inv_deps(update_dep, instance) walk_deps(update_wp, instance) elif (sender_name in delete_policy_models): walk_inv_deps(delete_if_inactive, instance) try: policy_handler = getattr(model_policies, policy_name, None) logger.error("POLICY HANDLER: %s %s" % (policy_name, policy_handler)) if policy_handler is not None: if (deleted): try: policy_handler.handle_delete(instance) except AttributeError: pass else: policy_handler.handle(instance) except: logger.log_exc("Model Policy Error:") try: instance.policed = timezone.now() instance.save(update_fields=['policed']) except: logging.error('Object %r is defective' % instance) bad_instances.append(instance)
def run_once(self): try: model_accessor.check_db_connection_okay() loop_start = time.time() error_map_file = Config.get('error_map_path') self.error_mapper = ErrorMapper(error_map_file) # Two passes. One for sync, the other for deletion. for deletion in [False, True]: # Set of individual objects within steps that failed self.failed_step_objects = set() # Set up conditions and step status # This is needed for steps to run in parallel # while obeying dependencies. providers = set() dependency_graph = self.dependency_graph if not deletion else self.deletion_dependency_graph for v in dependency_graph.values(): if (v): providers.update(v) self.step_conditions = {} self.step_status = {} for p in list(providers): self.step_conditions[p] = threading.Condition() self.step_status[p] = STEP_STATUS_WORKING self.failed_steps = [] threads = [] logger.debug('Deletion=%r...' % deletion) schedule = self.ordered_steps if not deletion else reversed( self.ordered_steps) for S in schedule: thread = threading.Thread(target=self.sync, name='synchronizer', args=(S, deletion)) logger.debug('Deletion=%r...' % deletion) threads.append(thread) # Start threads for t in threads: t.start() # another spot to clean up debug state try: model_accessor.reset_queries() except: # this shouldn't happen, but in case it does, catch it... logger.log_exc("exception in reset_queries") # Wait for all threads to finish before continuing with the run # loop for t in threads: t.join() self.save_run_times() loop_end = time.time() model_accessor.update_diag(loop_end=loop_end, loop_start=loop_start, backend_status="1 - Bottom Of Loop") except Exception as e: logger.error( 'Core error. This seems like a misconfiguration or bug: %r. This error will not be relayed to the user!' % e) logger.log_exc("Exception in observer run loop") traceback.print_exc() model_accessor.update_diag( backend_status="2 - Exception in Event Loop")
def sync(self, S, deletion): try: step = self.lookup_step_class(S) start_time = time.time() logger.debug("Starting to work on step %s, deletion=%s" % (step.__name__, str(deletion))) dependency_graph = self.dependency_graph if not deletion else self.deletion_dependency_graph # if not deletion else self.deletion_step_conditions step_conditions = self.step_conditions step_status = self.step_status # if not deletion else self.deletion_step_status # Wait for step dependencies to be met try: deps = dependency_graph[S] has_deps = True except KeyError: has_deps = False go = True failed_dep = None if (has_deps): for d in deps: if d == step.__name__: logger.debug(" step %s self-wait skipped" % step.__name__) go = True continue cond = step_conditions[d] cond.acquire() if (step_status[d] is STEP_STATUS_WORKING): logger.debug(" step %s wait on dep %s" % (step.__name__, d)) cond.wait() logger.debug(" step %s wait on dep %s cond returned" % (step.__name__, d)) elif step_status[d] == STEP_STATUS_OK: go = True else: logger.debug(" step %s has failed dep %s" % (step.__name__, d)) go = False failed_dep = d cond.release() if (not go): break else: go = True if (not go): logger.debug("Step %s skipped" % step.__name__) self.failed_steps.append(step) my_status = STEP_STATUS_KO else: sync_step = self.lookup_step(S) sync_step.__name__ = step.__name__ sync_step.dependencies = [] try: mlist = sync_step.provides try: for m in mlist: lst = self.model_dependency_graph[m.__name__] nlst = map(lambda a_b: a_b[1], lst) sync_step.dependencies.extend(nlst) except Exception as e: raise e except KeyError: pass sync_step.debug_mode = debug_mode should_run = False try: # Various checks that decide whether # this step runs or not self.check_class_dependency( sync_step, self.failed_steps) # dont run Slices if Sites failed # dont run sync_network_routes if time since last run < 1 # hour self.check_schedule(sync_step, deletion) should_run = True except StepNotReady: logger.info('Step not ready: %s' % sync_step.__name__) self.failed_steps.append(sync_step) my_status = STEP_STATUS_KO except Exception as e: logger.error('%r' % e) logger.log_exc("sync step failed: %r. Deletion: %r" % (sync_step, deletion)) self.failed_steps.append(sync_step) my_status = STEP_STATUS_KO if (should_run): try: duration = time.time() - start_time logger.debug('Executing step %s, deletion=%s' % (sync_step.__name__, deletion)) failed_objects = sync_step(failed=list( self.failed_step_objects), deletion=deletion) self.check_duration(sync_step, duration) if failed_objects: self.failed_step_objects.update(failed_objects) logger.debug("Step %r succeeded, deletion=%s" % (sync_step.__name__, deletion)) my_status = STEP_STATUS_OK self.update_run_time(sync_step, deletion) except Exception as e: logger.error( 'Model step %r failed. This seems like a misconfiguration or bug: %r. This error will not be relayed to the user!' % (sync_step.__name__, e)) logger.log_exc("Exception in sync step") self.failed_steps.append(S) my_status = STEP_STATUS_KO else: logger.info("Step %r succeeded due to non-run" % step) my_status = STEP_STATUS_OK try: my_cond = step_conditions[S] my_cond.acquire() step_status[S] = my_status my_cond.notify_all() my_cond.release() except KeyError as e: logger.debug('Step %r is a leaf' % step) pass finally: try: model_accessor.reset_queries() except: # this shouldn't happen, but in case it does, catch it... logger.log_exc("exception in reset_queries") model_accessor.connection_close()
def error(msg): logger.error( basename(str(inspect.stack()[1][1])) + ':' + str(inspect.stack()[1][2]) + ' ' + str(inspect.stack()[1][3]) + '() ' + str(msg))