def ping(cfg): """Checks the health of ProxySQL.""" p_cfg = None cfg_opts = {item[0]: item[1] for item in cfg.items('proxysql')} try: p_cfg = ProxySQLConfig(cfg_opts) p_cfg.validate() log.debug('Performing health check on ProxySQL instance at %s:%s' % (p_cfg.host, p_cfg.admin_port)) proxysql_man = ProxySQLManager(host=p_cfg.host, port=p_cfg.admin_port, user=p_cfg.admin_username, password=p_cfg.admin_password) proxysql_man.ping() except ProxySQLAdminConnectionError: log.error('ProxySQL ping failed. Unable to connect at %s:%s ' 'using username %s and password %s' % (p_cfg.host, p_cfg.admin_port, p_cfg.admin_username, "*" * len(p_cfg.admin_password))) exit(1) except (ModelValidationError, ModelConversionError) as e: log.error('ProxySQL configuration options error: %s' % e) exit(1) log.info('ProxySQL ping on %s:%s successful.' % (cfg_opts['host'], cfg_opts['admin_port']))
def main(ctx, cfg, debug, config, version): if ctx.invoked_subcommand is None: if version: print(__version__) exit(0) else: print(ctx.get_help()) exit(-1) setup_logging(log, debug=debug) if os.path.exists(config): cfg.read(config) else: log.error("Config file %s doesn't exist", config) exit(1)
def register(cfg): """Registers Galera cluster nodes with ProxySQL.""" proxy_cfg = galera_cfg = None proxy_options = {item[0]: item[1] for item in cfg.items('proxysql')} try: proxy_cfg = ProxySQLConfig(proxy_options) proxy_cfg.validate() except ModelValidationError as e: log.error('ProxySQL configuration options error: %s' % e) exit(1) galera_options = {item[0]: item[1] for item in cfg.items('galera')} try: galera_cfg = GaleraConfig(galera_options) galera_cfg.validate() except (ModelValidationError, ModelConversionError) as e: log.error('Galera configuration options error: %s' % e) exit(1) if not proxysql_tools.galera.register_cluster_with_proxysql( proxy_cfg, galera_cfg): log.error('Registration of Galera cluster nodes failed.') exit(1) log.info('Registration of Galera cluster nodes successful.')
def refresh_and_validate_node_state(node): """Validates the state of the node to ensure that the node's status is PRIMARY and that all the node's properties can be fetched. :param node: The object that stores information on the Galera node. :type node: GaleraNode """ try: node.refresh_state() if not node.cluster_status == CLUSTER_STATUS_PRIMARY: err_msg = ('Node %s:%s is in non-primary ' 'state %s' % (node.host, node.port, node.cluster_status)) log.error(err_msg) raise GaleraNodeNonPrimary(err_msg) except (ModelValidationError, OperationalError) as e: # The node state cannot be refreshed as some of the # properties of the node could not be fetched. We # should fail the discovery in such a case as this is # unexpected error. log.error('Node %s:%s state could not be fetched' % (node.host, node.port)) raise GaleraNodeUnknownState(e.messages)
def fetch_galera_manager(galera_cfg): """Finds a host from list of hosts passed as part of the config which is in primary part of the cluster. :param galera_cfg: The Galera config object. :type galera_cfg: GaleraConfig :return: A Galera manager that can be used to interact with the primary component of the Galera cluster. :rtype: GaleraManager :raises: ValueError, GaleraNodeNonPrimary, GaleraNodeUnknownState """ exception = None err_msg = '' for host_port in galera_cfg.cluster_host.split(','): try: host, port = [v.strip() for v in host_port.split(':')] # We check that the node that is being used to register the cluster # with ProxySQL is actually a healthy node and part of the primary # component. galera_man = GaleraManager(host, port, galera_cfg.cluster_username, galera_cfg.cluster_password) galera_man.discover_cluster_nodes() return galera_man except ValueError as e: exception = e err_msg = ('Cluster host config option %s in invalid format. ' 'Correct format host:port,host:port.' % host_port) log.error(err_msg) except GaleraNodeNonPrimary as e: exception = e err_msg = ('Cluster node %s:%s used for registration is ' 'non-primary, skipping.' % (host, port)) log.error(err_msg) except GaleraNodeUnknownState as e: exception = e err_msg = ('Cluster node %s:%s used for registration is in ' 'unknown state, skipping.' % (host, port)) log.error(err_msg) if exception is not None: log.error(err_msg) raise exception
def notify_master(cfg): """The function moves network interface to local instance and brings it up. Steps: - Detach network interface if attached to anywhere. - Attach the network interface to the local instance. - Configure IP address on this instance :param cfg: config object """ try: os.environ['AWS_ACCESS_KEY_ID'] = cfg.get('aws', 'aws_access_key_id') os.environ['AWS_SECRET_ACCESS_KEY'] = cfg.get('aws', 'aws_secret_access_key') os.environ['AWS_DEFAULT_REGION'] = cfg.get('aws', 'aws_default_region') except NoOptionError: log.error('aws_access_key_id, aws_secret_access_key and ' 'aws_default_region must be defined in ' 'aws section of the config file.') exit(-1) instance_id = get_my_instance_id() try: ip = cfg.get('proxysql', 'virtual_ip') netmask = cfg.get('proxysql', 'virtual_netmask') network_interface = get_network_interface(ip) if network_interface_attached(network_interface): detach_network_interface(network_interface) local_interface = "eth%d" % DEVICE_INDEX ensure_local_interface_is_gone(local_interface) ensure_network_interface_is_detached(network_interface) attach_network_inetrface(network_interface, instance_id) configure_local_interface(local_interface, ip, netmask) except NoOptionError as err: log.error('virtual_ip and virtual_netmask must be defined in ' 'proxysql section of the config file.') log.error(err)
def register_cluster_with_proxysql(proxy_cfg, galera_cfg): """Register a Galera cluster within ProxySQL. The nodes in the cluster will be distributed between writer hostgroup and reader hostgroup. :param proxy_cfg: The ProxySQL config object. :type proxy_cfg: ProxySQLConfig :param galera_cfg: The Galera config object. :type galera_cfg: GaleraConfig :return: Returns True on success, False otherwise. :rtype: bool """ hostgroup_writer = galera_cfg.writer_hostgroup_id hostgroup_reader = galera_cfg.reader_hostgroup_id # We also check that the initial node that is being used to register the # cluster with ProxySQL is actually a healthy node and part of the primary # component. try: galera_man = fetch_galera_manager(galera_cfg) if galera_man is None: return False except (ValueError, GaleraNodeNonPrimary, GaleraNodeUnknownState): return False # First we try to find nodes in synced state. galera_nodes_synced = [n for n in galera_man.nodes if n.local_state == LOCAL_STATE_SYNCED] galera_nodes_desynced = [n for n in galera_man.nodes if n.local_state == LOCAL_STATE_DONOR_DESYNCED] nodes_blacklist = fetch_nodes_blacklisted_for_writers(galera_cfg, galera_man.nodes) # If we found no nodes in synced or donor/desynced state then we # cannot continue. if not galera_nodes_synced and not galera_nodes_desynced: log.error('No node found in SYNCED or DESYNCED state.') return False proxysql_man = ProxySQLManager(proxy_cfg.host, proxy_cfg.admin_port, proxy_cfg.admin_username, proxy_cfg.admin_password, reload_runtime=False) try: # We also validate that we can connect to ProxySQL proxysql_man.ping() # Setup the monitoring user used by ProxySQL to monitor the backends setup_proxysql_monitoring_user(proxysql_man, proxy_cfg.monitor_username, proxy_cfg.monitor_password) # Let's remove all the nodes defined in the hostgroups that are not # part of this cluster or are not in desired state. if galera_nodes_synced: desired_state = LOCAL_STATE_SYNCED nodes_list = galera_nodes_synced else: desired_state = LOCAL_STATE_DONOR_DESYNCED nodes_list = galera_nodes_desynced # Handle write backends writer_backends = deregister_unhealthy_backends(proxysql_man, galera_man.nodes, hostgroup_writer, [desired_state], nodes_blacklist) # If there are more than one nodes in the writer hostgroup then we # remove all but one. if len(writer_backends) > 1: log.info('There are %d writers. Removing all but one.', len(writer_backends)) for backend in writer_backends[1:]: proxysql_man.deregister_backend(hostgroup_writer, backend.hostname, backend.port) elif len(writer_backends) == 0: # If there are no backends registered in the writer hostgroup # then we register one healthy galera node. We ignore any node # defined in the blacklist. node = None for n in nodes_list: if n in nodes_blacklist: continue node = n break # If we are unable to find a healthy node after discounting the # blacklisted nodes, then we ignore the blacklist. if node is None: node = nodes_list[0] proxysql_man.register_backend(hostgroup_writer, node.host, node.port) # Fetch the final list of writer backends writer_backend = [b for b in proxysql_man.fetch_backends(hostgroup_writer) if b.status == BACKEND_STATUS_ONLINE][0] # Now deregister all the unhealthy backends in reader hostgroup reader_backends = deregister_unhealthy_backends(proxysql_man, galera_man.nodes, hostgroup_reader, [desired_state]) # Now we register all of the healthy galera nodes in the # reader hostgroup. for node in nodes_list: if (len(reader_backends) > 0 and node.host == writer_backend.hostname and node.port == writer_backend.port): continue proxysql_man.register_backend(hostgroup_reader, node.host, node.port) # Now filter healthy backends that are common between writer hostgroup # and reader hostgroup reader_backends = [b for b in proxysql_man.fetch_backends(hostgroup_reader) if b.status == BACKEND_STATUS_ONLINE] # If we have more than one backend registered in the reader hostgroup # then we remove the ones that are also present in the writer hostgroup if len(reader_backends) > 1: for b in reader_backends: if (b.hostname == writer_backend.hostname and b.port == writer_backend.port): log.info("There is more than one backend registered " "in the reader hostgroup. We remove the ones " "that are also present in the writer hostgroup") proxysql_man.deregister_backend(hostgroup_reader, b.hostname, b.port) # TODO: Add user sync functionality that syncs MySQL users with Proxy. except ProxySQLAdminConnectionError: log.error('ProxySQL connection failed.') return False finally: # Reload the ProxySQL runtime so that it picks up all the changes # that have been made so far. proxysql_man.reload_runtime() return True
def discover_cluster_nodes(self): """Given the initial node find all the other nodes in the same cluster. It sets up the internal nodes list which is later used to perform operations on the nodes or the cluster. :return: Returns True on success, False otherwise. :rtype: bool """ initial_node = GaleraNode({ 'host': self.host, 'port': self.port, 'username': self.user, 'password': self.password }) # Check that the initial node status is 'PRIMARY' self.refresh_and_validate_node_state(initial_node) self._nodes = [initial_node] with initial_node.get_connection() as conn: with conn.cursor() as cursor: cursor.execute("SHOW GLOBAL STATUS LIKE " "'wsrep_incoming_addresses'") res = { r['Variable_name'].lower(): r['Value'].lower() for r in cursor.fetchall() } if not res.get('wsrep_incoming_addresses'): err_msg = ('Node %s:%s unknown status variable ' '"wsrep_incoming_addresses"') log.error(err_msg) raise GaleraNodeUnknownState(err_msg) log.info('Node %s:%s wsrep_incoming_addresses:: %s' % (initial_node.host, initial_node.port, res['wsrep_incoming_addresses'])) for host_port in res['wsrep_incoming_addresses'].split(','): host, port = host_port.split(':') # We ignore the initial node from wsrep_incoming_addresses if initial_node.host == host: continue node = GaleraNode({ 'host': host, 'port': int(port), 'username': initial_node.username, 'password': initial_node.password }) # Check that the initial node status is 'PRIMARY' self.refresh_and_validate_node_state(node) if GaleraManager.nodes_in_same_cluster(initial_node, node): self._nodes.append(node) return True