def set_mysql_config(): """ Read config.yml and setting variables """ try: stream = open("config.yml", "r") docs = yaml.load_all(stream) for doc in docs: MYSQL_CONFIG["Host"] = doc['mysql']["host"] MYSQL_CONFIG["Port"] = doc['mysql']["port"] MYSQL_CONFIG["User"] = doc['mysql']["user"] MYSQL_CONFIG["Password"] = doc['mysql']["password"] MYSQL_CONFIG["Db"] = doc['mysql']["db"] INSTANCE_CONFIG["InstanceId"] = doc['instance']["instance_id"] INSTANCE_CONFIG["VolumeId"] = doc['instance']["volume_id"] INSTANCE_CONFIG["InstanceName"] = doc['instance']["g_instance_name"] INSTANCE_CONFIG["LockFilename"] = doc['instance']["lock_filename"] INSTANCE_CONFIG["TagRole"] = doc['instance']["tag_role"] INSTANCE_CONFIG["TagSchema"] = doc['instance']["tag_schema"] INSTANCE_CONFIG["Environment"] = doc['instance']["tag_environment"] INSTANCE_CONFIG["Retention"] = doc['instance']["retention_seconds"] logging.debug(kayvee.formatLog("Set_mysql_config", "debug", "creating Set_mysql_config", {'context': "set_mysql_config Successfull", 'time': str(datetime.now())})) except Exception as e: logging.error(kayvee.formatLog("Set_mysql_config", "error", "creating Set_mysql_config", {'context': "set_mysql_config param not found: " + str(e), 'time': str(datetime.now())}))
def get_server_lag(dbconnection): """ Checking if database server has more than 10800 seconds of lag, if the number is above returns True :param dbconnection: :return: """ logging.info(kayvee.formatLog("get_server_lag", "info", "Starting ...", {'context': "checking Server for Lag", 'time': str(datetime.now())})) lag_status = True try: result = mysql_query(dbconnection, 'show slave status') if result is None: print (str(datetime.now()) + " Server is not a slave") else: for row in result.fetchall(): if row['Seconds_Behind_Master'] < 10800: lag_status = False logging.info(kayvee.formatLog("get_server_lag", "info", "Server lag", {'context': "Lag: " + str(lag_status), 'time': str(datetime.now())})) return lag_status except Exception as e: logging.error(kayvee.formatLog("get_server_lag", "error", "Error", dict(context="Error: " + str(e), time=str(datetime.now())))) finally: return lag_status
def _stop_server(dbconnection): """ Stop mysql server :param dbconnection: :return: bool """ logging.debug(kayvee.formatLog("Stop_server", "debug", "Starting ....", {'context': "checking if server is running", 'time': str(datetime.now())})) try: if check_server_is_running(dbconnection): logging.debug(kayvee.formatLog("Stop_server", "debug", "Service mysql stopping ...", {'context': "stopping ...", 'time': str(datetime.now())})) os.system("service mysql stop") time.sleep(3) else: logging.debug(kayvee.formatLog("Stop_server", "debug", "Service mysql stopped", {'context': "Stopped!", 'time': str(datetime.now())})) return True except (OSError, ValueError) as e: logging.error(kayvee.formatLog("Stop_server", "error", "Error!", {'context': "error:" + str(e), 'time': str(datetime.now())})) return False
def delete_single_snapshots_gt_d(snapshot, return_bool=True): """ :param snapshot: :param return_bool: :return: """ logging.debug(kayvee.formatLog("delete_snapshots_gt_d", "debug", "Starting...", {'context': "...", 'time': str(datetime.now())})) timestamp = datetime.strptime(snapshot.start_time,'%Y-%m-%dT%H:%M:%S.000Z') last_snap_time = int((datetime.utcnow() - timestamp).total_seconds()) logging.info(kayvee.formatLog("delete_snapshots_gt_d", "info", "last_snap_time: " + str(last_snap_time), {'context': "Get Snapshot timestamp:" + str(last_snap_time), 'time': str(datetime.now())})) if last_snap_time > int(INSTANCE_CONFIG["Retention"]): logging.info(kayvee.formatLog("delete_snapshots_gt_d", "info", "Candidates to be deleted", {'context': str(snapshot.id), 'time': str(datetime.now())})) ec.delete_snapshot(SnapshotId=snapshot.id) time.sleep(5) else: logging.info(kayvee.formatLog("delete_snapshots_gt_d", "info", "Not candidates to be deleted", {'context': "not candidates", 'time': str(datetime.now())})) return return_bool
def _ensure_snapshot(connection, volume, interval, name): """ Ensure that a given volume has an appropriate snapshot :type connection: boto.ec2.connection.EC2Connection :param connection: EC2 connection object :type volume: boto.ec2.volume.Volume :param volume: Volume to check :returns: None """ if interval not in VALID_INTERVALS: logging.warning( kayvee.formatLog("ebs-snapshots", "warning", "invalid snapshotting interval", { "volume": volume.id, "interval": interval })) return snapshots = connection.get_all_snapshots(filters={'volume-id': volume.id}) # Create a snapshot if we don't have any if not snapshots: _create_snapshot(connection, volume, name) return min_delta = 3600 * 24 * 365 * 10 # 10 years :) for snapshot in snapshots: timestamp = datetime.datetime.strptime(snapshot.start_time, '%Y-%m-%dT%H:%M:%S.000Z') delta_seconds = int( (datetime.datetime.utcnow() - timestamp).total_seconds()) if delta_seconds < min_delta: min_delta = delta_seconds logging.info( kayvee.formatLog( "ebs-snapshots", "info", 'The newest snapshot for {} is {} seconds old'.format( volume.id, min_delta))) if interval == 'hourly' and min_delta > 3600: _create_snapshot(connection, volume, name) elif interval == 'daily' and min_delta > 3600 * 24: _create_snapshot(connection, volume, name) elif interval == 'weekly' and min_delta > 3600 * 24 * 7: _create_snapshot(connection, volume, name) elif interval == 'monthly' and min_delta > 3600 * 24 * 30: _create_snapshot(connection, volume, name) elif interval == 'yearly' and min_delta > 3600 * 24 * 365: _create_snapshot(connection, volume, name) else: logging.info( kayvee.formatLog("ebs-snapshots", "info", "no snapshot needed", {"volume": volume.id}))
def _start_server(): """ Start Mysql server """ logging.debug(kayvee.formatLog("_start_server", "debug", "Starting mysql server", {'context': "starting service ...", 'time': str(datetime.now())})) try: os.system("service mysql start") except (OSError, ValueError) as e: logging.error(kayvee.formatLog("_start_server", "error", "Starting mysql server", {'context': "error has occurred" + str(e), 'time': str(datetime.now())}))
def run(connection, backup_client, volume_id, interval='daily', max_snapshots=0, name=''): """ Ensure that we have snapshots for a given volume :type connection: boto.ec2.connection.EC2Connection :param connection: EC2 connection object for primary EBS region :type backup_client: boto3.EC2.Client :param backup_client: EC2 client for backup region :type volume_id: str :param volume_id: identifier for boto.ec2.volume.Volume :type max_snapshots: int :param max_snapshots: number of snapshots to keep (0 means infinite) :returns: None """ try: volumes = connection.get_all_volumes([volume_id]) except EC2ResponseError as error: logging.error( kayvee.formatLog( "ebs-snapshots", "error", "failed to connect to AWS", { "msg": error.message, "_kvmeta": { "team": "eng-infra", "kv_version": "2.0.2", "kv_language": "python", "routes": [{ "type": "notifications", "channel": "#oncall-infra", "icon": ":camera_with_flash:", "user": "******", "message": "ERROR: " + str(error.message), }] } })) return logging.info( kayvee.formatLog("ebs-snapshots", "info", "run", { "volume": volume_id, "count": len(volumes) })) for volume in volumes: _ensure_snapshot(connection, backup_client, volume, interval, name) _remove_old_snapshots(connection, volume, max_snapshots) _remove_old_snapshot_backups(backup_client, volume.id, max_snapshots)
def remove_lock_file(): """ :return: bool """ logging.info(kayvee.formatLog("remove_lock_file", "info", "Removing lock file", {'context': "Starting", 'time': str(datetime.now())})) try: os.remove(INSTANCE_CONFIG["LockFilename"]) return True except IOError: logging.error(kayvee.formatLog("remove_lock_file", "error", "Removing lock file", {'context': "Can not remove", 'time': str(datetime.now())})) return False
def _ensure_snapshot(connection, volume, interval, name): """ Ensure that a given volume has an appropriate snapshot :type connection: boto.ec2.connection.EC2Connection :param connection: EC2 connection object :type volume: boto.ec2.volume.Volume :param volume: Volume to check :returns: None """ if interval not in VALID_INTERVALS: logging.warning(kayvee.formatLog("ebs-snapshots", "warning", "invalid snapshotting interval", { "volume": volume.id, "interval": interval })) return snapshots = connection.get_all_snapshots(filters={'volume-id': volume.id}) # Create a snapshot if we don't have any if not snapshots: _create_snapshot(connection, volume, name) return min_delta = 3600 * 24 * 365 * 10 # 10 years :) for snapshot in snapshots: timestamp = datetime.datetime.strptime( snapshot.start_time, '%Y-%m-%dT%H:%M:%S.000Z') delta_seconds = int( (datetime.datetime.utcnow() - timestamp).total_seconds()) if delta_seconds < min_delta: min_delta = delta_seconds logging.info(kayvee.formatLog("ebs-snapshots", "info", 'The newest snapshot for {} is {} seconds old'.format(volume.id, min_delta))) if interval == 'hourly' and min_delta > 3600: _create_snapshot(connection, volume, name) elif interval == 'daily' and min_delta > 3600*24: _create_snapshot(connection, volume, name) elif interval == 'weekly' and min_delta > 3600*24*7: _create_snapshot(connection, volume, name) elif interval == 'monthly' and min_delta > 3600*24*30: _create_snapshot(connection, volume, name) elif interval == 'yearly' and min_delta > 3600*24*365: _create_snapshot(connection, volume, name) else: logging.info(kayvee.formatLog("ebs-snapshots", "info", "no snapshot needed", {"volume": volume.id}))
def _remove_old_snapshots(connection, volume, max_snapshots): """ Remove old snapshots :type connection: boto.ec2.connection.EC2Connection :param connection: EC2 connection object :type volume: boto.ec2.volume.Volume :param volume: Volume to check :returns: None """ retention = max_snapshots if not type(retention) is int and retention >= 0: logging.warning( kayvee.formatLog("ebs-snapshots", "warning", "invalid max_snapshots value", { "volume": volume.id, "max_snapshots": retention })) return snapshots = connection.get_all_snapshots(filters={'volume-id': volume.id}) # Sort the list based on the start time snapshots.sort(key=lambda x: x.start_time) # Remove snapshots we want to keep snapshots = snapshots[:-int(retention)] if not snapshots: logging.info( kayvee.formatLog("ebs-snapshots", "info", "no old snapshots to remove")) return for snapshot in snapshots: logging.info( kayvee.formatLog("ebs-snapshots", "info", "deleting snapshot", {"snapshot": snapshot.id})) try: snapshot.delete() except EC2ResponseError as error: logging.warning( kayvee.formatLog("ebs-snapshots", "warning", "could not remove snapshot", { "snapshot": snapshot.id, "msg": error.message })) logging.info( kayvee.formatLog("ebs-snapshots", "info", "done deleting snapshots"))
def mysql_query(conn, query): """ Wrapper to connect to the database returning certain query value :param conn: :param query: :return: cur """ logging.info(kayvee.formatLog("mysql_query", "info", "Starting...", {'context': "Wrapper for query:" + str(query), 'time': str(datetime.now())})) try: cur = conn.cursor(MySQLdb.cursors.DictCursor) cur.execute(query) return cur except (OSError, ValueError) as err: logging.error(kayvee.formatLog("mysql_query", "error", "Error on mysql_query", {'context': " Error:" + str(err), 'time': str(datetime.now())})) return None
def create_snapshots(backup_conf): ec2_connection = ec2.connect_to_region( aws_region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) for volume, params in backup_conf.get().iteritems(): logging.info(kayvee.formatLog("ebs-snapshots", "info", "about to take ebs snapshot {} - {}".format(volume, params))) interval = params.get('interval', 'daily') max_snapshots = params.get('max_snapshots', 0) name = params.get('name', '') snapshot_manager.run( ec2_connection, volume, interval, max_snapshots, name)
def get(self): """ Get a dict of config items """ try: new_config = self.refresh() self._validate_config(new_config) self.config = new_config except Exception as e: logging.warning(kayvee.formatLog("ebs-snapshots", "warning", "unable to load backup config", {"path": self.path, "error": str(e)})) return self.config
def _create_snapshot(connection, volume, name=''): """ Create a new snapshot :type volume: boto.ec2.volume.Volume :param volume: Volume to snapshot :returns: boto.ec2.snapshot.Snapshot -- The new snapshot """ logging.info(kayvee.formatLog("ebs-snapshots", "info", "creating new snapshot", {"volume": volume.id})) snapshot = volume.create_snapshot( description="automatic snapshot by ebs-snapshots") if not name: name = '{}-snapshot'.format(volume.id) connection.create_tags( [snapshot.id], dict(Name=name, creator='ebs-snapshots')) logging.info(kayvee.formatLog("ebs-snapshots", "info", "created snapshot successfully", { "name": name, "volume": volume.id, "snapshot": snapshot.id })) return snapshot
def run(): """ """ conn = get_mysql_conn() create_snapshot(conn) volume_list_per_func(INSTANCE_CONFIG["VolumeId"], delete_single_snapshots_gt_d) if volume_list_per_func(INSTANCE_CONFIG["VolumeId"], check_last_single_snapshots_completed)\ and get_lock_file() and conn is None: logging.info(kayvee.formatLog("volume_list_per_func + delete_single_snapshots_gt_d", "info", "second step reboot mysql", {'context': "rebooting mysql", 'time': str(datetime.now())})) _start_server() remove_lock_file() logging.info(kayvee.formatLog("Process", "info", ">>>>>>>>>>>>>>>>>>>>> End Process <<<<<<<<<<<<<<<<<<<<<<<", {'context': "Exit Process", 'time': str(datetime.now())}))
def test_formatLog(self): for test in tests['formatLog']: print "TEST: {}".format(test['title']) actual = kv.formatLog( test['input'].get('source', None), test['input'].get('level', None), test['input'].get('title', None), test['input'].get('data', None) ) expected = test['output'] self.assertEqualJson(actual, expected)
def get_lock_file(): """ check lock file exists :return: bool """ if os.path.isfile(INSTANCE_CONFIG["LockFilename"]): logging.info(kayvee.formatLog("get_lock_file", "info", "Getting Lock File", {'context': " exists, process already running", 'time': str(datetime.now())})) return True else: return False
def get_mysql_conn(): """ Read Database parameters from variables and create database connection object :return: MySQLdb """ logging.debug(kayvee.formatLog("get_mysql_conn", "debug", "Starting ....", {'context': "Create mysql conn", 'time': str(datetime.now())})) set_mysql_config() try: return MySQLdb.connect( host=MYSQL_CONFIG['Host'], port=MYSQL_CONFIG["Port"], passwd=MYSQL_CONFIG["Password"], db=MYSQL_CONFIG["Db"], user=MYSQL_CONFIG["User"] ) except Exception as e: logging.error(kayvee.formatLog("get_mysql_conn", "error", "Error!", {'context': "Error get_mysql_conn: " + str(e), 'time': str(datetime.now())}))
def set_lock_file(): """ Check if lock file exists,If file does exists then False value, if not create a new one and returns True :return: """ return_value = False logging.debug(kayvee.formatLog("set_lock_file", "debug", "Setting Lock File", {'context': "Starting", 'time': str(datetime.now())})) if not get_lock_file(): lock_file = open(INSTANCE_CONFIG["LockFilename"], 'w') try: fcntl.lockf(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) logging.info(kayvee.formatLog("set_lock_file", "info", "Setting Lock File", {'context': "Locking process", 'time': str(datetime.now())})) return_value = True except IOError: logging.info(kayvee.formatLog("set_lock_file", "info", "Setting Lock File", {'context': " Cannot lock: " + INSTANCE_CONFIG["LockFilename"], 'time': str(datetime.now())})) return return_value return return_value
def check_last_single_snapshots_age(snapshot, return_bool=True): """ :param snapshot: :param return_bool: :return: """ logging.info(kayvee.formatLog("check_last_single_snapshots_completed", "info", "Starting...", {'context': "...", 'time': str(datetime.now())})) timestamp = datetime.strptime( snapshot.start_time, '%Y-%m-%dT%H:%M:%S.000Z') logging.info(kayvee.formatLog("check_last_single_snapshots_completed", "info", "Check snapshot aged", {'context': "snapshot id:" + str(snapshot.id) + " timestamp:" + str(timestamp), 'time': str(datetime.now())})) if int((datetime.utcnow() - timestamp).total_seconds()) < 86400: return_bool = False return return_bool
def _remove_old_snapshots(connection, volume, max_snapshots): """ Remove old snapshots :type connection: boto.ec2.connection.EC2Connection :param connection: EC2 connection object :type volume: boto.ec2.volume.Volume :param volume: Volume to check :returns: None """ retention = max_snapshots if not type(retention) is int and retention >= 0: logging.warning(kayvee.formatLog("ebs-snapshots", "warning", "invalid max_snapshots value", { "volume": volume.id, "max_snapshots": retention })) return snapshots = connection.get_all_snapshots(filters={'volume-id': volume.id}) # Sort the list based on the start time snapshots.sort(key=lambda x: x.start_time) # Remove snapshots we want to keep snapshots = snapshots[:-int(retention)] if not snapshots: logging.info(kayvee.formatLog("ebs-snapshots", "info", "no old snapshots to remove")) return for snapshot in snapshots: logging.info(kayvee.formatLog("ebs-snapshots", "info", "deleting snapshot", {"snapshot": snapshot.id})) try: snapshot.delete() except EC2ResponseError as error: logging.warning(kayvee.formatLog("ebs-snapshots", "warning", "could not remove snapshot", { "snapshot": snapshot.id, "msg": error.message })) logging.info(kayvee.formatLog("ebs-snapshots", "info", "done deleting snapshots"))
def check_last_single_snapshots_completed(snapshot, return_bool=True): """ :param snapshot: :param return_bool: :return: """ logging.debug(kayvee.formatLog("check_last_single_snapshots_completed", "debug", "Starting...", {'context': "...", 'time': str(datetime.now())})) try: if snapshot.status != 'completed': return_bool = False except Exception as e: logging.error(kayvee.formatLog("check_last_single_snapshots_completed", "error", "Error", dict(context="Error: " + str(e), time=str(datetime.now())))) return_bool = False finally: return return_bool
def _create_snapshot(connection, volume, name=''): """ Create a new snapshot :type volume: boto.ec2.volume.Volume :param volume: Volume to snapshot :returns: boto.ec2.snapshot.Snapshot -- The new snapshot """ logging.info( kayvee.formatLog("ebs-snapshots", "info", "creating new snapshot", {"volume": volume.id})) snapshot = volume.create_snapshot( description="automatic snapshot by ebs-snapshots") if not name: name = '{}-snapshot'.format(volume.id) connection.create_tags([snapshot.id], dict(Name=name, creator='ebs-snapshots')) logging.info( kayvee.formatLog("ebs-snapshots", "info", "created snapshot successfully", { "name": name, "volume": volume.id, "snapshot": snapshot.id })) return snapshot
def create_snapshots(backup_conf): ec2_connection = ec2.connect_to_region( aws_region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) for volume, params in backup_conf.get().iteritems(): logging.info( kayvee.formatLog( "ebs-snapshots", "info", "about to take ebs snapshot {} - {}".format(volume, params))) interval = params.get('interval', 'daily') max_snapshots = params.get('max_snapshots', 0) name = params.get('name', '') snapshot_manager.run(ec2_connection, volume, interval, max_snapshots, name)
def volume_list_per_func(pvolumeid, name_func): logging.info(kayvee.formatLog("volume_list_per_func", "info", "Starting...", {'context': "...", 'time': str(datetime.now())})) return_value = True try: ec2 = connect_to_region('us-east-1') i = 0 for i in range(len(pvolumeid)): snapshots = ec2.get_all_snapshots(filters={'volume-id': [pvolumeid[i]]}) for snapshot in snapshots: return_value = name_func(snapshot) if not return_value: break i += 1 except Exception as e: logging.error(kayvee.formatLog("volume_list_per_func", "error", "Error", dict(context="Error: " + str(e), time=str(datetime.now())))) return_value = False finally: return return_value
def get(self): """ Get a dict of config items """ try: new_config = self.refresh() self._validate_config(new_config) self.config = new_config except Exception as e: logging.warning( kayvee.formatLog("ebs-snapshots", "warning", "unable to load backup config", { "path": self.path, "error": str(e) })) return self.config
def create_snapshots(backup_conf): ec2_connection = ec2.connect_to_region(aws_region) ec2_backup_client = boto3.client("ec2", region_name=aws_backup_region) for volume, params in backup_conf.get().iteritems(): logging.info( kayvee.formatLog( "ebs-snapshots", "info", "about to evaluate ebs snapshots for {} - {}".format( volume, params), data={})) interval = params.get('interval', 'daily') max_snapshots = params.get('max_snapshots', 0) name = params.get('name', '') snapshot_manager.run(ec2_connection, ec2_backup_client, volume, interval, max_snapshots, name)
def check_server_is_running(dbconnection): """ Check we open a connection to the server asking for the read_only flag. If process fail False is returned :param dbconnection: :return: bool """ server_running = True logging.info(kayvee.formatLog("check_server_is_running", "info", "Starting ...", {'context': " ...:", 'time': str(datetime.now())})) result = 0 try: if not get_lock_file(): logging.debug(kayvee.formatLog("check_server_is_running", "debug", "Lock file exists ...", {'context': "Lock file NOT exists", 'time': str(datetime.now())})) if dbconnection is not None: logging.debug(kayvee.formatLog("check_server_is_running", "debug", "Check dbconnection", {'context': "Check_server_is_running.dbconnection: " + str(dbconnection), 'time': str(datetime.now())})) cursor = mysql_query(dbconnection, "select @@read_only read_only") result_set = cursor.fetchall() for row in result_set: result = row["read_only"] logging.debug(kayvee.formatLog("check_server_is_running", "debug", "Check Server is master", {'context': "Check_server_is_running_is_read_only: " + str(result), 'time': str(datetime.now())})) if result != 1: server_running = False logging.info(kayvee.formatLog("check_server_is_running", "info", "Server is Master", {'context': "Server is a Master: " + str(server_running), 'time': str(datetime.now())})) else: logging.info(kayvee.formatLog("check_server_is_running", "info", "Server is Slave", {'context': "Server is a Slave: ", 'time': str(datetime.now())})) return server_running except Exception as e: logging.error(kayvee.formatLog("Check_server_is_running", "error", "Error on function", {'context': "Error: " + str(e), 'time': str(datetime.now())})) server_running = False finally: return server_running
def run(connection, volume_id, interval='daily', max_snapshots=0, name=''): """ Ensure that we have snapshots for a given volume :type connection: boto.ec2.connection.EC2Connection :param connection: EC2 connection object :type volume_id: str :param volume_id: identifier for boto.ec2.volume.Volume :type max_snapshots: int :param max_snapshots: number of snapshots to keep (0 means infinite) :returns: None """ try: volumes = connection.get_all_volumes([volume_id]) except EC2ResponseError as error: logging.error(kayvee.formatLog("ebs-snapshots", "error", "failed to connect to AWS", {"msg": error.message})) return for volume in volumes: _ensure_snapshot(connection, volume, interval, name) _remove_old_snapshots(connection, volume, max_snapshots)
def run(connection, volume_id, interval='daily', max_snapshots=0, name=''): """ Ensure that we have snapshots for a given volume :type connection: boto.ec2.connection.EC2Connection :param connection: EC2 connection object :type volume_id: str :param volume_id: identifier for boto.ec2.volume.Volume :type max_snapshots: int :param max_snapshots: number of snapshots to keep (0 means infinite) :returns: None """ try: volumes = connection.get_all_volumes([volume_id]) except EC2ResponseError as error: logging.error( kayvee.formatLog("ebs-snapshots", "error", "failed to connect to AWS", {"msg": error.message})) return for volume in volumes: _ensure_snapshot(connection, volume, interval, name) _remove_old_snapshots(connection, volume, max_snapshots)
from ebs_snapshots import ebs_snapshots_daemon import sys import kayvee import logging logging.basicConfig(level=logging.INFO) if __name__ == "__main__": try: ebs_snapshots_daemon.snapshot_timer() except Exception as e: logging.error( kayvee.formatLog("ebs-snapshots", "error", "unknown exception", {"error": str(e)})) sys.exit(1)
def _ensure_snapshot(connection, backup_client, volume, interval, name): """ Ensure that a given volume has appropriate snapshot(s) and backup snapshot(s) :type connection: boto.ec2.connection.EC2Connection :param connection: EC2 connection object :type backup_client: boto3.EC2.Client :param backup_client: EC2 client for backup region :type volume: boto.ec2.volume.Volume :param volume: Volume to check :type name: str :param name: a name to tag the snapshot(s) with :returns: None """ if interval not in VALID_INTERVALS: logging.warning( kayvee.formatLog("ebs-snapshots", "warning", "invalid snapshotting interval", { "volume": volume.id, "interval": interval })) return snapshots = connection.get_all_snapshots(filters={'volume-id': volume.id}) # Create a snapshot if we don't have any if not snapshots: logging.info( kayvee.formatLog("ebs-snapshots", "info", "no snapshots found - creating snapshot", {"volume": volume.id})) _create_snapshot(connection, volume, name) return latest_snapshot_id = None min_delta = 3600 * 24 * 365 * 10 # 10 years :) latest_complete_snapshot_id = None min_complete_snapshot_delta = 3600 * 24 * 365 * 10 for snapshot in snapshots: # Determine time since latest snapshot. timestamp = datetime.datetime.strptime(snapshot.start_time, '%Y-%m-%dT%H:%M:%S.%fZ') delta_seconds = int( (datetime.datetime.utcnow() - timestamp).total_seconds()) if delta_seconds < min_delta: latest_snapshot_id = snapshot.id min_delta = delta_seconds # Determine latest completed snapshot's id. if snapshot.status == "completed" and delta_seconds < min_complete_snapshot_delta: latest_complete_snapshot_id = snapshot.id min_complete_snapshot_delta = delta_seconds logging.info( kayvee.formatLog( "ebs-snapshots", "info", 'The newest snapshot for {} is {} seconds old (snapshot {})'. format(volume.id, min_delta, latest_snapshot_id), data={"volume": volume.id})) logging.info( kayvee.formatLog( "ebs-snapshots", "info", 'The newest completed snapshot for {} is {} seconds old (snapshot {})' .format(volume.id, min_complete_snapshot_delta, latest_complete_snapshot_id), data={"volume": volume.id})) # Create snapshot if latest is older than interval. intervalToSeconds = { u'hourly': 3600, u'daily': 3600 * 24, u'weekly': 3600 * 24 * 7, u'monthly': 3600 * 24 * 30, u'yearly': 3600 * 24 * 365, } if min_delta > intervalToSeconds[interval]: _create_snapshot(connection, volume, name) # copy the last one we created to backup region if latest_complete_snapshot_id is None: logging.info( kayvee.formatLog( "ebs-snapshots", "info", "waiting to create backup snapshot until snapshot is complete", {"volume": volume.id})) else: _copy_snapshot(backup_client, volume, latest_complete_snapshot_id, name) else: logging.info( kayvee.formatLog("ebs-snapshots", "info", "no snapshot needed", { "volume": volume.id, "lastest_snapshot_id": latest_snapshot_id }))
def _copy_snapshot(backup_client, volume, snapshot_id, name): """ Copy a snapshot to another region :type backup_client: boto3.EC2.Client :param backup_client: EC2 client for backup region :type volume: boto.ec2.volume.Volume :param volume: Volume that snapshot is of :type snapshot_id: str :param snapshot_id: identifier for boto.ec2.snapshot.Snapshot (the snapshot to copy) :returns: str -- the id of the copy """ logging.info( kayvee.formatLog("ebs-snapshots", "info", "copying snapshot", { "volume": volume.id, "source_snapshot": snapshot_id })) region = _availability_zone_to_region_name(volume.zone) try: response = backup_client.copy_snapshot( SourceRegion=region, SourceSnapshotId=snapshot_id, Encrypted=True, Description='copy of {}'.format(snapshot_id)) except ClientError as error: if error.response["Error"]["Code"] == "ResourceLimitExceeded": logging.info( kayvee.formatLog( "ebs-snapshots", "info", "copying snapshot", { "volume": volume.id, "source_snapshot": snapshot_id, "name": name })) else: logging.error( kayvee.formatLog( "ebs-snapshots", "error", "snapshot copy error", { "name": name, "volume": volume.id, "source_snapshot": snapshot_id, "error": error.response["Error"]["Code"] })) return None try: backup_client.create_tags(Resources=[response["SnapshotId"]], Tags=[{ "Key": "Name", "Value": name }, { "Key": "creator", "Value": "ebs-snapshots" }, { "Key": "source_snapshot", "Value": snapshot_id }, { "Key": "volume-id", "Value": volume.id }]) except ClientError as error: logging.error( kayvee.formatLog( "ebs-snapshots", "error", "unable to tag snapshot copy (error)", { "name": name, "volume": volume.id, "source_snapshot": snapshot_id, "error": error.response["Error"]["Code"] })) return response["SnapshotId"] logging.info( kayvee.formatLog( "ebs-snapshots", "info", "copied snapshot successfully", { "name": name, "volume": volume.id, "source_snapshot": snapshot_id, "snapshot_copy": response["SnapshotId"] })) return response["SnapshotId"]
def main(): while True: logging.info(kayvee.formatLog("Main", "info", ">>>>>>>>>>>>>>>>>>>>> Starting <<<<<<<<<<<<<<<<<<<<<<<", {'context': "...", 'time': str(datetime.now())})) run() time.sleep(3600)
import kayvee import logging logging.basicConfig(level=logging.INFO) if __name__ == "__main__": try: ebs_snapshots_daemon.snapshot_timer() except Exception as e: logging.error( kayvee.formatLog( "ebs-snapshots", "error", "unknown exception", { "error": str(e), "_kvmeta": { "team": "eng-infra", "kv_version": "2.0.2", "kv_language": "python", "routes": [{ "type": "notifications", "channel": "#oncall-infra", "icon": ":camera_with_flash:", "user": "******", "message": "ERROR: " + str(e), }] } })) sys.exit(1)
def create_snapshot(dbconnection): """ :param dbconnection: :return: """ return_value = False try: logging.info(kayvee.formatLog("create_snapshot", "info", "create_snapshot", {'context': "Starting", 'time': str(datetime.now())})) if check_server_is_running(dbconnection): if get_server_lag(dbconnection): logging.info(kayvee.formatLog("create_snapshot", "info", "get_server_lag=True", {'context': "Server has lag, Exit", 'time': str(datetime.now())})) else: logging.info(kayvee.formatLog("create_snapshot", "info", "get_server_lag=False", {'context': " Server has Not lag, continue", 'time': str(datetime.now())})) if not volume_list_per_func(INSTANCE_CONFIG["VolumeId"], check_last_single_snapshots_completed): logging.info(kayvee.formatLog("volume_list_per_func + check_last_single_snapshots_completed ", "info", "create_snapshot", {'context': "Check_last_snapshots_completed=False, Exit", 'time': str(datetime.now())})) else: logging.info(kayvee.formatLog("create_snapshot", "info", "create_snapshot", {'context': "check_last_single_snapshots_completed=True, Continue", 'time': str(datetime.now())})) if volume_list_per_func(INSTANCE_CONFIG["VolumeId"], check_last_single_snapshots_age): logging.info(kayvee.formatLog("volume_list_per_func + check_last_single_snapshots_age", "info", "create_snapshot", {'context': "Check_last_snapshots_age=True", 'time': str(datetime.now())})) instances = ec2reource.instances.filter(InstanceIds=[INSTANCE_CONFIG["InstanceId"]]) set_lock_file() _stop_server(dbconnection) for instance in instances: logging.info(kayvee.formatLog("create_snapshot", "info", "create_snapshot", {'context': "Volume_id:" + str(INSTANCE_CONFIG["VolumeId"]), 'time': str(datetime.now())})) volume1 = instance.volumes.filter(VolumeIds=INSTANCE_CONFIG["VolumeId"], ) for v in volume1: snapshot = ec.create_snapshot(VolumeId=v.id, Description="Lambda backup for ebs " + v.id) time.sleep(10) ec.create_tags( Resources=[ snapshot['SnapshotId'], ], Tags=[ {'Key': 'Name', 'Value': INSTANCE_CONFIG["InstanceName"]}, {'Key': 'role', 'Value': INSTANCE_CONFIG["TagRole"]}, {'Key': 'schema', 'Value': INSTANCE_CONFIG["TagSchema"]}, {'Key': 'Environment', 'Value': INSTANCE_CONFIG["Environment"]}, ] ) result = snapshot["SnapshotId"] logging.info(kayvee.formatLog("create_snapshot", "info", "creating snapshots", {'context': str(result), 'time': str(datetime.now())})) return_value = True else: logging.info(kayvee.formatLog("create_snapshot", "info", "creating snapshots", {'context': "check_last_single_snapshots_age=False", 'time': str(datetime.now())})) return return_value except (OSError, ValueError) as err: logging.error(kayvee.formatLog("create_snapshot", "error", "creating snapshots", {'context': "Check_last_snapshots_age=False" + str(err), 'time': str(datetime.now())}))
from ebs_snapshots import ebs_snapshots_daemon import sys import kayvee import logging logging.basicConfig(level=logging.INFO) if __name__ == "__main__": try: ebs_snapshots_daemon.snapshot_timer() except Exception as e: logging.error(kayvee.formatLog("ebs-snapshots", "error", "unknown exception", {"error": str(e)})) sys.exit(1)
def _remove_old_snapshot_backups(client, volume_id, max_snapshots): """ Remove old snapshot backups :type client: boto3.EC2.Client :param client: EC2 client object :type volume_id: str :param volume_id: ID of volume to check :returns: None """ logging.info( kayvee.formatLog("ebs-snapshots", "info", "removing old backup snapshots", data={"volume": volume_id})) retention = max_snapshots if not type(retention) is int and retention >= 0: logging.warning( kayvee.formatLog("ebs-snapshots", "warning", "invalid max_snapshots value", { "volume": volume_id, "max_snapshots": retention })) return response = client.describe_snapshots(Filters=[{ "Name": "tag:volume-id", "Values": [volume_id] }]) snapshots = response["Snapshots"] # Sort the list based on the start time snapshots.sort(key=lambda x: x["StartTime"]) # Remove snapshots we want to keep snapshots = snapshots[:-int(retention)] if not snapshots: logging.info( kayvee.formatLog("ebs-snapshots", "info", "no old backup snapshots to remove", data={"volume": volume_id})) return ec2 = boto3.resource('ec2', region_name=aws_backup_region) for snapshotInfo in snapshots: snapshot = ec2.Snapshot(snapshotInfo["SnapshotId"]) logging.info( kayvee.formatLog("ebs-snapshots", "info", "deleting backup snapshot", {"snapshot": snapshot.id})) try: snapshot.delete() except Exception as e: # @TODO: how to get exceptions for boto3 resource? logging.error( kayvee.formatLog("ebs-snapshots", "error", "could not remove backup snapshot (error)", { "snapshot": snapshot.id, "error": str(e) })) logging.info( kayvee.formatLog("ebs-snapshots", "info", "done deleting snapshot backups", data={"volume": volume_id}))