def _report_failure(self, server):
        """Mark the server as faulty and report a failure.

        The thread is created to allow the built-in failure detector
        to continue monitoring the servers so that if the report failure
        hangs, it will kill all connections to faulty servers thus
        eventually freeing the thread.

        Not though that the report failure is not crash-safe so it might
        fail without promoting a new server to master. In the future, we
        will circumvent this limitation.
        """
        try:
            _persistence.init_thread()

            server.status = MySQLServer.FAULTY
            self.__connection_manager.purge_connections(server)

            procedures = trigger(
                "REPORT_FAILURE", None, str(server.uuid),
                threading.current_thread().name, MySQLServer.FAULTY, False
            )
            executor = _executor.Executor()
            for procedure in procedures:
                executor.wait_for_procedure(procedure)

            _persistence.deinit_thread()
        finally:
            self.__thread_report_failure = False
    def _run(self):
        """Function that verifies servers' availabilities.
        """
        ignored_status = [MySQLServer.FAULTY]
        quarantine = {}
        interval = FailureDetector._DETECTION_INTERVAL
        detections = FailureDetector._DETECTIONS
        detection_timeout = FailureDetector._DETECTION_TIMEOUT

        _persistence.init_thread()

        while self.__check:
            try:
                unreachable = set()
                group = Group.fetch(self.__group_id)
                if group is not None:
                    for server in group.servers():
                        if server.status in ignored_status or \
                            MySQLServer.is_alive(server, detection_timeout):
                            if server.status == MySQLServer.FAULTY:
                                self.__connection_manager.purge_connections(
                                    server
                                )
                            continue

                        unreachable.add(server.uuid)

                        _LOGGER.warning(
                            "Server (%s) in group (%s) is unreachable.",
                            server.uuid, self.__group_id
                        )

                        unstable = False
                        failed_attempts = 0
                        if server.uuid not in quarantine:
                            quarantine[server.uuid] = failed_attempts = 1
                        else:
                            failed_attempts = quarantine[server.uuid] + 1
                            quarantine[server.uuid] = failed_attempts
                        if failed_attempts >= detections:
                            unstable = True

                        can_set_faulty = group.can_set_server_faulty(
                            server, get_time()
                        )
                        if unstable and can_set_faulty:
                            self._spawn_report_failure(server)
                            
                for uuid in quarantine.keys():
                    if uuid not in unreachable:
                        del quarantine[uuid]

            except (_errors.ExecutorError, _errors.DatabaseError):
                pass
            except Exception as error:
                _LOGGER.exception(error)

            time.sleep(interval / detections)

        _persistence.deinit_thread()
Beispiel #3
0
def check_credentials(group, command, config, protocol):
    """Check credentials using configuration

    :raises errors.CredentialError: if login failed, or if user has no
                                    permission
    """
    if group not in ('user', 'role'):
        return

    _configure_connections(config)
    _persistence.init_thread()

    if not protocol:
        protocol = FABRIC_DEFAULT_PROTOCOL

    section = 'protocol.' + protocol

    username = config.get(section, 'user')
    password = config.get(section, 'password')
    realm = config.get(section, 'realm', vars=FABRIC_PROTOCOL_DEFAULTS)

    user = User.fetch_user(username, protocol=protocol)
    password_hash = _hash_password(username, password, protocol, config, realm)

    if user is None or user.password_hash != password_hash:
        _LOGGER.info("Failed login for user %s/%s", username, protocol)
        raise _errors.CredentialError("Login failed")
    elif not user.has_permission('core', group, command):
        _LOGGER.info("Permission denied for user %s/%s", username, protocol)
        raise _errors.CredentialError("No permission")
Beispiel #4
0
    def run(self):
        """Process registered requests.
        """
        _LOGGER.info("Started XML-RPC-Session.")
        try:
            _persistence.init_thread()
        except Exception as error:
            _LOGGER.warning("Error connecting to backing store: (%s).", error)

        SessionThread.local_thread.thread = self

        while True:
            request, client_address = self.__server.dequeue_request()
            _LOGGER.debug(
                "Processing request (%s) from (%s) through thread (%s).",
                request, client_address, self)
            # There is no need to catch exceptions here because the method
            # process_request_thread already does so. It is the main entry
            # point in the code which means that any uncaught exception
            # in the code will be reported as xmlrpclib.Fault.
            self.__server.process_request_thread(request, client_address)
            _LOGGER.debug(
                "Finishing request (%s) from (%s) through thread (%s).",
                request, client_address, self)
            if self.__is_shutdown:
                self.__server.shutdown_now()

        try:
            _persistence.deinit_thread()
        except Exception as error:
            _LOGGER.warning("Error connecting to backing store: (%s).", error)
Beispiel #5
0
def check_credentials(group, command, config, protocol):
    """Check credentials using configuration

    :raises errors.CredentialError: if login failed, or if user has no
                                    permission
    """
    if group not in ('user', 'role'):
        return

    _configure_connections(config)
    _persistence.init_thread()

    if not protocol:
        protocol = FABRIC_DEFAULT_PROTOCOL

    section = 'protocol.' + protocol

    username = config.get(section, 'user')
    password = config.get(section, 'password')
    realm = config.get(section, 'realm', vars=FABRIC_PROTOCOL_DEFAULTS)

    user = User.fetch_user(username, protocol=protocol)
    password_hash = _hash_password(username, password, protocol, config, realm)

    if user is None or user.password_hash != password_hash:
        _LOGGER.info("Failed login for user %s/%s", username, protocol)
        raise _errors.CredentialError("Login failed")
    elif not user.has_permission('core', group, command):
        _LOGGER.info("Permission denied for user %s/%s", username, protocol)
        raise _errors.CredentialError("No permission")
Beispiel #6
0
def setup_xmlrpc(options, config):
    # Set up the persistence.
    from mysql.fabric import persistence

    # Set up the manager.
    from mysql.fabric.services.manage import (
        _start,
        _configure_connections,
    )

    _configure_connections(config)
    persistence.setup()
    persistence.init_thread()
    _start(options, config)

    # Set up the client.
    url = "http://%s" % (config.get("protocol.xmlrpc", "address"),)
    proxy = xmlrpclib.ServerProxy(url)

    while True:
        try:
            proxy.manage.ping()
            break
        except Exception:
            pass

    return proxy
Beispiel #7
0
    def run(self):
        """Process registered requests.
        """
        _LOGGER.info("Started XML-RPC-Session.")
        try:
            _persistence.init_thread()
        except Exception as error:
            _LOGGER.warning("Error connecting to backing store: (%s).", error)

        SessionThread.local_thread.thread = self

        while True:
            request, client_address = self.__server.dequeue_request()
            _LOGGER.debug(
               "Processing request (%s) from (%s) through thread (%s).",
               request, client_address, self
            )
            # There is no need to catch exceptions here because the method
            # process_request_thread already does so. It is the main entry
            # point in the code which means that any uncaught exception
            # in the code will be reported as xmlrpclib.Fault.
            self.__server.process_request_thread(request, client_address)
            _LOGGER.debug(
                "Finishing request (%s) from (%s) through thread (%s).",
                request, client_address, self
            )
            if self.__is_shutdown:
                self.__server.shutdown_now()

        try:
            _persistence.deinit_thread()
        except Exception as error:
            _LOGGER.warning("Error connecting to backing store: (%s).",
                            error)
Beispiel #8
0
 def setup(self):
     """Setup the MySQLRPC request handler"""
     self._handshaked = False
     self._authenticated = False
     self._curr_pktnr = 1
     persistence.init_thread()
     self._store = persistence.current_persister()
Beispiel #9
0
def setup_xmlrpc(options, config):
    # Set up the persistence.
    from mysql.fabric import persistence

    # Set up the manager.
    from mysql.fabric.services.manage import (
        _start,
        _configure_connections,
    )

    _configure_connections(config)
    persistence.setup()
    persistence.init_thread()
    _start(options, config)

    # Set up the client.
    url = "http://%s" % (config.get("protocol.xmlrpc", "address"),)
    proxy = xmlrpclib.ServerProxy(url)

    while True:
        try:
            proxy.manage.ping()
            break
        except Exception:
            pass

    return proxy
Beispiel #10
0
 def setup(self):
     """Setup the MySQLRPC request handler"""
     self._handshaked = False
     self._authenticated = False
     self._curr_pktnr = 1
     persistence.init_thread()
     self._store = persistence.current_persister()
Beispiel #11
0
 def dispatch(self, *args):
     """Setup Fabric Storage System.
     """
     # Configure connections.
     _configure_connections(self.config)
     _persistence.init_thread()
     self.persister = _persistence.current_persister()
     self.execute(*args)
Beispiel #12
0
 def dispatch(self, *args):
     """Setup Fabric Storage System.
     """
     # Configure connections.
     _configure_connections(self.config)
     _persistence.init_thread()
     self.persister = _persistence.current_persister()
     self.execute(*args)
Beispiel #13
0
def _start(options, config):
    """Start Fabric server.
    """

    # Remove temporary defaults file, which migh have left behind
    # by former runs of Fabric.
    _backup.cleanup_temp_defaults_files()

    #Configure TTL
    _setup_ttl(config)

    # Configure modules that are not dynamic loaded.
    _server.configure(config)
    _error_log.configure(config)
    _failure_detector.configure(config)

    # Load information on all providers.
    providers.find_providers()

    # Load all services into the service manager
    _services.ServiceManager().load_services(options, config)

    # Initilize the state store.
    _persistence.init_thread()

    # Check the maximum number of threads.
    _utils.check_number_threads()

    # Configure Fabric Node.
    fabric = FabricNode()
    reported = _utils.get_time()
    _LOGGER.info(
        "Fabric node version (%s) started. ",
        fabric.version,
        extra={
            'subject' : str(fabric.uuid),
            'category' : MySQLHandler.NODE,
            'type' : MySQLHandler.START,
            'reported' : reported
        }
    )
    fabric.startup = reported

    # Start the executor, failure detector and then service manager. In this
    # scenario, the recovery is sequentially executed after starting the
    # executor and before starting the service manager.
    _events.Handler().start()
    _recovery.recovery()
    _failure_detector.FailureDetector.register_groups()
    _services.ServiceManager().start()
Beispiel #14
0
def _start(options, config):
    """Start Fabric server.
    """

    # Remove temporary defaults file, which migh have left behind
    # by former runs of Fabric.
    _backup.cleanup_temp_defaults_files()

    #Configure TTL
    _setup_ttl(config)

    # Configure modules that are not dynamic loaded.
    _server.configure(config)
    _error_log.configure(config)
    _failure_detector.configure(config)

    # Load information on all providers.
    providers.find_providers()

    # Load all services into the service manager
    _services.ServiceManager().load_services(options, config)

    # Initilize the state store.
    _persistence.init_thread()

    # Check the maximum number of threads.
    _utils.check_number_threads()

    # Configure Fabric Node.
    fabric = FabricNode()
    reported = _utils.get_time()
    _LOGGER.info("Fabric node version (%s) started. ",
                 fabric.version,
                 extra={
                     'subject': str(fabric.uuid),
                     'category': MySQLHandler.NODE,
                     'type': MySQLHandler.START,
                     'reported': reported
                 })
    fabric.startup = reported

    # Start the executor, failure detector and then service manager. In this
    # scenario, the recovery is sequentially executed after starting the
    # executor and before starting the service manager.
    _events.Handler().start()
    _recovery.recovery()
    _failure_detector.FailureDetector.register_groups()
    _services.ServiceManager().start()
Beispiel #15
0
def _start(options, config):
    """Start Fabric server.
    """
    # Configure modules that are not dynamic loaded.
    _server.configure(config)
    _error_log.configure(config)
    _failure_detector.configure(config)

    # Load all services into the service manager
    _services.ServiceManager().load_services(options, config)

    # Initilize the state store.
    _persistence.init_thread()

    # Check the maximum number of threads.
    _utils.check_number_threads()

    # Configure Fabric Node.
    fabric = FabricNode()
    reported = _utils.get_time()
    _LOGGER.info(
        "Fabric node starting.",
        extra={
            'subject' : str(fabric.uuid),
            'category' : MySQLHandler.NODE,
            'type' : MySQLHandler.START,
            'reported' : reported
        }
    )
    fabric.startup = reported

    # Start the executor, failure detector and then service manager. In this
    # scenario, the recovery is sequentially executed after starting the
    # executor and before starting the service manager.
    _events.Handler().start()
    _recovery.recovery()
    _failure_detector.FailureDetector.register_groups()
    _services.ServiceManager().start()
    def _run(self):
        """Function that verifies servers' availabilities.
        """
        from mysql.fabric.server import (
            Group,
            MySQLServer,
            ConnectionManager,
        )

        ignored_status = [MySQLServer.FAULTY]
        quarantine = {}
        interval = FailureDetector._DETECTION_INTERVAL
        detections = FailureDetector._DETECTIONS
        detection_timeout = FailureDetector._DETECTION_TIMEOUT
        connection_manager = ConnectionManager()
        slave_deep_checks = FailureDetector._SLAVE_DEEP_CHECKS

        _persistence.init_thread()

        while self.__check:
            try:
                unreachable = set()
                group = Group.fetch(self.__group_id)
                if group is not None:
                    for server in group.servers():
                        if server.status in ignored_status:

                            ### Server is FAULTY
                            connection_manager.kill_connections(server)
                            continue
                        else:
                            ### Server is Not FAULTY
                            if MySQLServer.is_alive(server, detection_timeout):

                                ### Server is alive
                                ### check depends on `slave_deep_checks` parameter
                                if slave_deep_checks:

                                    ### When server is alive and status != FAULTY
                                    is_master= (group.master == server.uuid)
                                    if not is_master:
                                        ### Checking master is dead or alive.
                                        master_server = MySQLServer.fetch(group.master)
    
                                        if MySQLServer.is_alive(master_server, detection_timeout):
    
                                            ### Checking is replication valid or not if master is alive.
                                            server.connect()
                                            slave_issues, why_slave_issues = \
                                                _replication.check_slave_issues(server)
                                            if slave_issues:
        
                                                if (why_slave_issues['io_error'] and \
                                                    why_slave_issues['io_errno'] == 2003):
        
                                                    ### Nothing to do during reconnecting, just logging
                                                    _LOGGER.info(why_slave_issues)
        
                                                else:
                                                        
                                                    ### If slave threads are not running, set status to SPARE
                                                    server.status = MySQLServer.SPARE
        
                                            ### Done slave_issues.
                                            server.disconnect()
    
                                        ### Endif MySQLServer.is_alive(master_server, detection_timeout)
                                    ### Endif not is_master
                                ### Endif slave_deep_checks
                                continue
                            ### Else MySQLServer.is_alive(server, detection_timeout)
                            else:

                                unreachable.add(server.uuid)

                                _LOGGER.warning(
                                    "Server (%s) in group (%s) is unreachable.",
                                    server.uuid, self.__group_id
                                )
        
                                unstable = False
                                failed_attempts = 0
                                if server.uuid not in quarantine:
                                    quarantine[server.uuid] = failed_attempts = 1
                                else:
                                    failed_attempts = quarantine[server.uuid] + 1
                                    quarantine[server.uuid] = failed_attempts
                                if failed_attempts >= detections:
                                    unstable = True
        
                                can_set_faulty = group.can_set_server_faulty(
                                    server, get_time()
                                )
                                if unstable and can_set_faulty:
                                    # We have to make this transactional and make the
                                    # failover (i.e. report failure) robust to failures.
                                    # Otherwise, a master might be set to faulty and
                                    # a new one never promoted.
                                    server.status = MySQLServer.FAULTY
                                    connection_manager.kill_connections(server)
                                    
                                    procedures = trigger("REPORT_FAILURE", None,
                                        str(server.uuid),
                                        threading.current_thread().name,
                                        MySQLServer.FAULTY, False
                                    )
                                    executor = _executor.Executor()
                                    for procedure in procedures:
                                        executor.wait_for_procedure(procedure)

                            ### Endif MySQLServer.is_alive(server, detection_timeout)
                        ### Endif server.status in ignored_status
                    ### End for server in group.servers()
                ### Endif group is not None
                for uuid in quarantine.keys():
                    if uuid not in unreachable:
                        del quarantine[uuid]

            except (_errors.ExecutorError, _errors.DatabaseError):
                pass
            except Exception as error:
                _LOGGER.exception(error)

            time.sleep(interval)

        _persistence.deinit_thread()
Beispiel #17
0
    def _run(self):
        """Function that verifies servers' availabilities.
        """
        from mysql.fabric.server import (
            Group,
            MySQLServer,
            ConnectionManager,
        )

        ignored_status = [MySQLServer.FAULTY]
        quarantine = {}
        interval = FailureDetector._DETECTION_INTERVAL
        detections = FailureDetector._DETECTIONS
        detection_timeout = FailureDetector._DETECTION_TIMEOUT
        connection_manager = ConnectionManager()

        _persistence.init_thread()

        while self.__check:
            try:
                unreachable = set()
                group = Group.fetch(self.__group_id)
                if group is not None:
                    for server in group.servers():
                        if server.status in ignored_status or \
                            MySQLServer.is_alive(server, detection_timeout):
                            if server.status == MySQLServer.FAULTY:
                                connection_manager.kill_connections(server)
                            continue

                        unreachable.add(server.uuid)

                        _LOGGER.warning(
                            "Server (%s) in group (%s) is unreachable.",
                            server.uuid, self.__group_id
                        )

                        unstable = False
                        failed_attempts = 0
                        if server.uuid not in quarantine:
                            quarantine[server.uuid] = failed_attempts = 1
                        else:
                            failed_attempts = quarantine[server.uuid] + 1
                            quarantine[server.uuid] = failed_attempts
                        if failed_attempts >= detections:
                            unstable = True

                        can_set_faulty = group.can_set_server_faulty(
                            server, get_time()
                        )
                        if unstable and can_set_faulty:
                            # We have to make this transactional and make the
                            # failover (i.e. report failure) robust to failures.
                            # Otherwise, a master might be set to faulty and
                            # a new one never promoted.
                            server.status = MySQLServer.FAULTY
                            connection_manager.kill_connections(server)
                            
                            procedures = trigger("REPORT_FAILURE", None,
                                str(server.uuid),
                                threading.current_thread().name,
                                MySQLServer.FAULTY, False
                            )
                            executor = _executor.Executor()
                            for procedure in procedures:
                                executor.wait_for_procedure(procedure)

                for uuid in quarantine.keys():
                    if uuid not in unreachable:
                        del quarantine[uuid]

            except (_errors.ExecutorError, _errors.DatabaseError):
                pass
            except Exception as error:
                _LOGGER.exception(error)

            time.sleep(interval / detections)

        _persistence.deinit_thread()