def pinghost(host): try: output = str( subprocess.Popen(["ping.exe", host], stdout=subprocess.PIPE).communicate()[0]) if 'unreachable' in output: log_error('!!!!!' + host + ' is unreachable!!!!!') return False else: log_info(host + ' is online') return True except Exception as error: log_warn('Current Machine is not running Windows-based OS: ' + str(error)) # Ping command count option as function of OS param = '-n' if system_name().lower() == 'windows' else '-c' # Building the command. Ex: "ping -c 1 phlamtecdb-a" command = ['ping', param, '1', host] # Pinging if system_call(command) == 0: log_info(host + ' is online') return True else: log_error('!!!!!' + host + ' is OFFLINE!!!!!') return False
def healthcheck(host, port, instance_name, user, password): try: mysql_connection = mysql.connector.connect(host=host, port=port, user=user, password=password, connection_timeout=10) mysql_connection.is_connected() log_info(host + ' ' + instance_name + ' INSTANCE, IS UP') return 0 except Exception as error: log_error('!!!!!' + host + ' ' + instance_name + ' INSTANCE, IS DOWN!!!!!:\t' + str(error)) return -1
def confirm_normality(self): if os.path.isfile(self.DANGER_LOCK) or os.path.isfile( self.WARNING_LOCK): self.messages.append({ 'status': 'good', 'short_message': 'Everything is back to normal', 'long_message': 'Nothing to complain about.', 'time_string': datetime.datetime.now().isoformat() }) self.clear_locks() log_info('Everything is OK!')
def sendnotification(config, subject): try: distro_list = config['distro_list'] msg = EmailMessage() msg.set_content(subject) msg['Subject'] = subject msg['From'] = config['smtp_info']['from'] msg['To'] = ", ".join(distro_list) s = smtplib.SMTP(config['smtp_info']['host'], config['smtp_info']['port']) s.send_message(msg) s.quit() log_info('Notification Sent') except Exception as error: log_error('Error sending notification!: ' + str(error)) finally: return
def sendnotification_replication_errors_nfixed(config, body): try: distro_list = config['distro_list'] msg = EmailMessage() msg.set_content('Error in replication due to statement:\n\n' + body) msg['Subject'] = 'Replication Error(s) on host: ' + config['smtp_info']['host'] + ': Unable to Resolve' msg['From'] = config['smtp_info']['from'] msg['To'] = ", ".join(distro_list) s = smtplib.SMTP(config['smtp_info']['host'], config['smtp_info']['port']) s.send_message(msg) s.quit() log_info('Notification Sent') except Exception as error: log_error('Error sending notification!: ' + str(error)) finally: return
def check(self): try: cnx = mysql.connector.connect(user=self.user, password=self.password, host=self.host, port=self.port) cursor = cnx.cursor() query = 'SHOW SLAVE STATUS;' something = cursor.execute(query) replication_status_row = cursor.fetchall()[0] last_error_no = replication_status_row[18] last_error = replication_status_row[19] seconds_behind_master = replication_status_row[32] slave_sql_running_state = replication_status_row[44] log_info('Last Error No: ' + str(last_error_no)) log_info('Last Error: ' + str(last_error_no)) log_info('Seconds behind master: ' + str(seconds_behind_master)) log_info('slave_sql_running_state: ' + str(slave_sql_running_state)) if last_error_no != 0: self.raise_replication_error(last_error, slave_sql_running_state) elif seconds_behind_master >= self.lag_interval: self.track_lag(slave_sql_running_state, seconds_behind_master) else: self.confirm_normality() except Exception as error: self.raise_exception(error) if self.messages: self.trigger_notifications()
def clear_replication_errors(self): stop_slave_query = 'STOP SLAVE;' skip_counter_query = 'set global sql_slave_skip_counter = 1;)' start_slave_query = 'START SLAVE;' slave_status_query = 'SHOW SLAVE STATUS;' total_errors = 0 errors = dict() more_errors = True try: # Log in as the replication user log_info('Replication: Loggin in as Replication User') cnx = mysql.connector.connect( user=self.config['replication']['user'], password=self.config['replication']['password'], host=self.host, port=self.port) log_info('Replication: Successfully Logged in as Replication User') except Exception as error: log_error( "Replication: Error logging in as Replication User on host: " + self.host + ': ' + str(error)) sendnotification_replication_errors_nfixed( self.config, 'Potential Errors present in Replication.' ' Unable to resolve issue(s).') while more_errors: try: log_info('Replication: Performing \"Status\" on slave') # Get the status of the replication slave cursor = cnx.cursor() cursor.execute(slave_status_query) replication_status_row = cursor.fetchall()[0] replication_error_code = str(replication_status_row[18]) replication_error_str = str(replication_status_row[19]) # If there are errors, lets try to resolve them if int(replication_error_code) != 0: log_error('Replication: Error detected: ' + replication_error_str) errors[replication_error_code] = replication_error_str total_errors = total_errors + 1 # lets skip the error and try to resolve the hang up cursor.execute(stop_slave_query) cursor.execute(skip_counter_query) cursor.execute(start_slave_query) # Lets check for anymore errors that may be holding us up cursor.execute(slave_status_query) replication_status_row = cursor.fetchall()[0] replication_error_code = str(replication_status_row[18]) if int(replication_error_code) == 0: more_errors = False log_info("Replication: No more errors detected") sendnotification_replication_errors_fixed( self.config, build_error_report(errors, total_errors, True), replication_error_code) except Exception as error: self.raise_exception(error) log_error( "Replication: Error preventing all replication issues to be resolve. " "Some or none of these issues were resolved:\t" + error) sendnotification_replication_errors_nfixed( self.config, 'Potential Errors present in Replication.' ' Unable to resolve issue(s).')
import os from MySQL.health import healthcheck, pinghost from MySQL.replication import ReplicationChecker from Notifier.notify import sendnotification from Utilities.utility import log_info, log_debug if __name__ == '__main__': directory = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) config = yaml.load((open(os.path.join(directory, 'config.yml'), 'r').read())) logging.basicConfig(filename=os.path.join(directory, 'replication.log'), level=logging.DEBUG) log_info('Monitor Started') # Get all the instance names and their port numbers log_debug('Gathering instance names from config') instances = config['instances'] ports = dict() for instance in instances: ports[instance] = config['mysql'][instance] # !Check the Instance Status' for port in ports: log_info('Performing health check on instance: ' + str(port).upper()) status = healthcheck(config['mysql']['host'], ports.get(port), port, config['mysql']['user'], config['mysql']['password']) if status == -1: