def _start(options, config): """Start Fabric server. """ # Remove temporary defaults file, which migh have left behind # by former runs of Fabric. _backup.cleanup_temp_defaults_files() #Configure TTL _setup_ttl(config) # Configure modules that are not dynamic loaded. _server.configure(config) _error_log.configure(config) _failure_detector.configure(config) # Load information on all providers. providers.find_providers() # Load all services into the service manager _services.ServiceManager().load_services(options, config) # Initilize the state store. _persistence.init_thread() # Check the maximum number of threads. _utils.check_number_threads() # Configure Fabric Node. fabric = FabricNode() reported = _utils.get_time() _LOGGER.info( "Fabric node version (%s) started. ", fabric.version, extra={ 'subject' : str(fabric.uuid), 'category' : MySQLHandler.NODE, 'type' : MySQLHandler.START, 'reported' : reported } ) fabric.startup = reported # Start the executor, failure detector and then service manager. In this # scenario, the recovery is sequentially executed after starting the # executor and before starting the service manager. _events.Handler().start() _recovery.recovery() _failure_detector.FailureDetector.register_groups() _services.ServiceManager().start()
def _start(options, config): """Start Fabric server. """ # Remove temporary defaults file, which migh have left behind # by former runs of Fabric. _backup.cleanup_temp_defaults_files() #Configure TTL _setup_ttl(config) # Configure modules that are not dynamic loaded. _server.configure(config) _error_log.configure(config) _failure_detector.configure(config) # Load information on all providers. providers.find_providers() # Load all services into the service manager _services.ServiceManager().load_services(options, config) # Initilize the state store. _persistence.init_thread() # Check the maximum number of threads. _utils.check_number_threads() # Configure Fabric Node. fabric = FabricNode() reported = _utils.get_time() _LOGGER.info("Fabric node version (%s) started. ", fabric.version, extra={ 'subject': str(fabric.uuid), 'category': MySQLHandler.NODE, 'type': MySQLHandler.START, 'reported': reported }) fabric.startup = reported # Start the executor, failure detector and then service manager. In this # scenario, the recovery is sequentially executed after starting the # executor and before starting the service manager. _events.Handler().start() _recovery.recovery() _failure_detector.FailureDetector.register_groups() _services.ServiceManager().start()
def _start(options, config): """Start Fabric server. """ # Configure modules that are not dynamic loaded. _server.configure(config) _error_log.configure(config) _failure_detector.configure(config) # Load all services into the service manager _services.ServiceManager().load_services(options, config) # Initilize the state store. _persistence.init_thread() # Check the maximum number of threads. _utils.check_number_threads() # Configure Fabric Node. fabric = FabricNode() reported = _utils.get_time() _LOGGER.info( "Fabric node starting.", extra={ 'subject' : str(fabric.uuid), 'category' : MySQLHandler.NODE, 'type' : MySQLHandler.START, 'reported' : reported } ) fabric.startup = reported # Start the executor, failure detector and then service manager. In this # scenario, the recovery is sequentially executed after starting the # executor and before starting the service manager. _events.Handler().start() _recovery.recovery() _failure_detector.FailureDetector.register_groups() _services.ServiceManager().start()
def test_recovery_chain_jobs(self): """Check checkpoint and recovery when a job triggers another job. """ global COUNT_1, COUNT_2 count_1 = 10 count_2 = 30 proc_uuid = _uuid.UUID("01da10ed-514e-43a4-8388-ab05c04d67e1") lockable_objects = set(["lock"]) job_uuid = _uuid.UUID("e4e1ba17-ff1d-45e6-a83c-5655ea5bb646") job_sequence = 0 job_uuid_registered_1 = \ _uuid.UUID("aaa1ba17-ff1d-45e6-a83c-5655ea5bb646") job_sequence_1 = 1 job_uuid_registered_2 = \ _uuid.UUID("bbb1ba17-ff1d-45e6-a83c-5655ea5bb646") job_sequence_2 = 2 do_action = check_do_action do_action_registered_1 = check_do_action_registered_1 do_action_registered_2 = check_do_action_registered_2 do_action_fqn = do_action.__module__ + "." + do_action.__name__ do_action_registered_1_fqn = \ do_action_registered_1.__module__ + "." + \ do_action_registered_1.__name__ do_action_registered_2_fqn = \ do_action_registered_2.__module__ + "." + \ do_action_registered_2.__name__ args = (count_1, count_2) kwargs = {} # BEGIN DO FINISH (FAILURE) COUNT_1 = 0 COUNT_2 = 0 checkpoint = _checkpoint.Checkpoint( proc_uuid, lockable_objects, job_uuid, job_sequence, do_action_fqn, args, kwargs ) registered_1 = _checkpoint.Checkpoint( proc_uuid, lockable_objects, job_uuid_registered_1, job_sequence_1, do_action_registered_1_fqn, args, kwargs ) registered_2 = _checkpoint.Checkpoint( proc_uuid, lockable_objects, job_uuid_registered_2, job_sequence_2, do_action_registered_2_fqn, args, kwargs ) checkpoint.register() checkpoint.begin() self.persister.begin() do_action(10, 30) checkpoint.finish() registered_1.register() registered_2.register() self.persister.commit() self.assertEqual(COUNT_1, 10) self.assertEqual(COUNT_2, 30) self.assertEqual(MyTransAction.count(), 1) self.assertEqual(len(_checkpoint.Checkpoint.registered()), 2) self.assertEqual(len(_checkpoint.Checkpoint.unfinished()), 0) self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 3) _checkpoint.Checkpoint.cleanup() self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 3) _recovery.recovery() executor = _executor.Executor() procedure = executor.get_procedure(checkpoint.proc_uuid) if procedure is not None: procedure.wait() self.assertEqual(COUNT_1, 30) self.assertEqual(COUNT_2, 90) self.assertEqual(len(_checkpoint.Checkpoint.unfinished()), 0) self.assertEqual(len(_checkpoint.Checkpoint.registered()), 0) self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 0) executor.remove_procedure(proc_uuid)
def test_recovery_single_job(self): """Check checkpoint and recovery with a single job. """ global COUNT_1, COUNT_2 count_1 = 10 count_2 = 30 proc_uuid = _uuid.UUID("9f994e3a-a732-43ba-8aab-f1051f553437") lockable_objects = set(["lock"]) job_uuid = _uuid.UUID("64835080-2114-46de-8fbf-8caba8e8cd90") job_sequence = 0 do_action = check_do_action do_action_fqn = do_action.__module__ + "." + do_action.__name__ args = (count_1, count_2) kwargs = {} # (FAILURE) BEGIN DO FINISH COUNT_1 = 0 COUNT_2 = 0 checkpoint = _checkpoint.Checkpoint( proc_uuid, lockable_objects, job_uuid, job_sequence, do_action_fqn, args, kwargs ) checkpoint.register() self.assertEqual(COUNT_1, 0) self.assertEqual(COUNT_2, 0) self.assertEqual(MyTransAction.count(), 0) self.assertEqual(len(_checkpoint.Checkpoint.registered()), 1) self.assertEqual(len(_checkpoint.Checkpoint.unfinished()), 0) self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 1) _checkpoint.Checkpoint.cleanup() self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 1) _recovery.recovery() executor = _executor.Executor() procedure = executor.get_procedure(checkpoint.proc_uuid) if procedure is not None: procedure.wait() self.assertEqual(COUNT_1, 10) self.assertEqual(COUNT_2, 30) self.assertEqual(MyTransAction.count(), 1) self.assertEqual(len(_checkpoint.Checkpoint.unfinished()), 0) self.assertEqual(len(_checkpoint.Checkpoint.registered()), 0) self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 0) executor.remove_procedure(proc_uuid) # BEGIN (FAILURE) DO FINISH COUNT_1 = 0 COUNT_2 = 0 checkpoint = _checkpoint.Checkpoint( proc_uuid, lockable_objects, job_uuid, job_sequence, do_action_fqn, args, kwargs ) checkpoint.register() checkpoint.begin() self.persister.begin() ####### empty ####### self.persister.rollback() self.assertEqual(COUNT_1, 0) self.assertEqual(COUNT_2, 0) self.assertEqual(MyTransAction.count(), 1) self.assertEqual(len(_checkpoint.Checkpoint.registered()), 1) self.assertEqual(len(_checkpoint.Checkpoint.unfinished()), 1) self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 1) _checkpoint.Checkpoint.cleanup() self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 1) _recovery.recovery() executor = _executor.Executor() procedure = executor.get_procedure(checkpoint.proc_uuid) if procedure is not None: procedure.wait() self.assertEqual(COUNT_1, 10) self.assertEqual(COUNT_2, 30) self.assertEqual(MyTransAction.count(), 2) self.assertEqual(len(_checkpoint.Checkpoint.unfinished()), 0) self.assertEqual(len(_checkpoint.Checkpoint.registered()), 0) self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 0) executor.remove_procedure(proc_uuid) # BEGIN DO (FAILURE) FINISH COUNT_1 = 0 COUNT_2 = 0 checkpoint = _checkpoint.Checkpoint( proc_uuid, lockable_objects, job_uuid, job_sequence, do_action_fqn, args, kwargs ) checkpoint.register() checkpoint.begin() self.persister.begin() do_action(10, 30) checkpoint.finish() self.persister.rollback() self.assertEqual(COUNT_1, 10) self.assertEqual(COUNT_2, 30) self.assertEqual(MyTransAction.count(), 2) self.assertEqual(len(_checkpoint.Checkpoint.registered()), 1) self.assertEqual(len(_checkpoint.Checkpoint.unfinished()), 1) self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 1) _checkpoint.Checkpoint.cleanup() self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 1) _recovery.recovery() executor = _executor.Executor() procedure = executor.get_procedure(checkpoint.proc_uuid) if procedure is not None: procedure.wait() self.assertEqual(COUNT_1, 10) self.assertEqual(COUNT_2, 30) self.assertEqual(MyTransAction.count(), 3) self.assertEqual(len(_checkpoint.Checkpoint.unfinished()), 0) self.assertEqual(len(_checkpoint.Checkpoint.registered()), 0) self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 0) executor.remove_procedure(proc_uuid) # BEGIN DO FINISH (FAILURE) COUNT_1 = 0 COUNT_2 = 0 checkpoint = _checkpoint.Checkpoint( proc_uuid, lockable_objects, job_uuid, job_sequence, do_action_fqn, args, kwargs, ) checkpoint.register() checkpoint.begin() self.persister.begin() do_action(10, 30) checkpoint.finish() self.persister.commit() self.assertEqual(COUNT_1, 10) self.assertEqual(COUNT_2, 30) self.assertEqual(MyTransAction.count(), 4) self.assertEqual(len(_checkpoint.Checkpoint.registered()), 0) self.assertEqual(len(_checkpoint.Checkpoint.unfinished()), 0) self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 1) _recovery.recovery() executor = _executor.Executor() procedure = executor.get_procedure(checkpoint.proc_uuid) if procedure is not None: procedure.wait() self.assertEqual(COUNT_1, 10) self.assertEqual(COUNT_2, 30) self.assertEqual(MyTransAction.count(), 4) self.assertEqual(len(_checkpoint.Checkpoint.unfinished()), 0) self.assertEqual(len(_checkpoint.Checkpoint.registered()), 0) self.assertEqual(len(_checkpoint.Checkpoint.fetch(proc_uuid)), 0) executor.remove_procedure(proc_uuid)