def main(self): """ Serves as the entry point for the CLI """ logger.info('Starting snapshot tagging') if self.args.aws_creds_profile: os.environ['AWS_PROFILE'] = self.args.aws_creds_profile ebs_snapshotter = EbsSnapshotter(self.args.region, verbose=True) if not ebs_snapshotter.is_region_valid(self.args.region): logger.info("Invalid region") sys.exit(1) else: logger.info("Region: %s:", self.args.region) ebs_util = EbsUtil(self.args.region, verbose=True) ebs_snapshotter = EbsSnapshotter(self.args.region, verbose=True) # filter out the already tagged volumes skip_volume_ids = [] if not self.args.retag_volumes: # They don't want us to retag volumes that are already tagged, so # add the already tagged volumes to the list of volume IDs to skip. skip_volume_ids += ebs_snapshotter.get_already_tagged_volume_ids( ) logger.info('Skipping this many volume ids: %s', len(skip_volume_ids)) vol_ids = ebs_util.get_classified_volume_ids(skip_volume_ids) for id_name, id_list in vol_ids._asdict().iteritems(): logger.info('name: %s amount: %s', id_name, len(id_list)) ## Actually create the snapshot tags now if self.args.master_root_volumes and vol_ids.master_root: self.set_master_root_volume_tags(vol_ids.master_root, ebs_snapshotter, ebs_util) if self.args.node_root_volumes and vol_ids.node_root: self.set_node_root_volume_tags(vol_ids.node_root, ebs_snapshotter, ebs_util) if self.args.docker_storage_volumes and vol_ids.docker_storage: self.set_docker_storage_volume_tags(vol_ids.docker_storage, ebs_snapshotter, ebs_util) if self.args.manually_provisioned_pv_volumes and vol_ids.manually_provisioned_pv: self.set_manually_provisioned_pv_volume_tags( vol_ids.manually_provisioned_pv, ebs_snapshotter, ebs_util) if self.args.autoprovisioned_pv_volumes and vol_ids.autoprovisioned_pv: self.set_autoprovisioned_pv_volume_tags( vol_ids.autoprovisioned_pv, ebs_snapshotter, ebs_util) if self.args.unidentified_volumes and vol_ids.unidentified: self.set_unidentified_volume_tags(vol_ids.unidentified, ebs_snapshotter)
def __init__(self): """ initialize EBSStuckVolumesCheck class """ self.args = None self.vol_state_data = None self.parse_args() # Make sure we're using the profile they've requested. if self.args.aws_creds_profile: os.environ['AWS_PROFILE'] = self.args.aws_creds_profile self.eu = EbsUtil(self.args.region, verbose=self.args.verbose) self.mts = MetricSender(verbose=self.args.verbose)
def main(self): """ Serves as the entry point for the CLI """ if self.args.aws_creds_profile: os.environ['AWS_PROFILE'] = self.args.aws_creds_profile for region in EbsUtil.get_supported_regions(): print print "Region: %s:" % region ebs_util = EbsUtil(region.name, verbose=True) ebs_snapshotter = EbsSnapshotter(region.name, verbose=True) # filter out the already tagged volumes skip_volume_ids = [] if not self.args.retag_volumes: # They don't want us to retag volumes that are already tagged, so # add the already tagged volumes to the list of volume IDs to skip. skip_volume_ids += ebs_snapshotter.get_already_tagged_volume_ids( ) vol_ids = ebs_util.get_classified_volume_ids(skip_volume_ids) ## Actually create the snapshot tags now if self.args.master_root_volumes and vol_ids.master_root: self.set_master_root_volume_tags(vol_ids.master_root, ebs_snapshotter, ebs_util) if self.args.node_root_volumes and vol_ids.node_root: self.set_node_root_volume_tags(vol_ids.node_root, ebs_snapshotter, ebs_util) if self.args.docker_storage_volumes and vol_ids.docker_storage: self.set_docker_storage_volume_tags(vol_ids.docker_storage, ebs_snapshotter, ebs_util) if self.args.manually_provisioned_pv_volumes and vol_ids.manually_provisioned_pv: self.set_manually_provisioned_pv_volume_tags( vol_ids.manually_provisioned_pv, ebs_snapshotter, ebs_util) if self.args.autoprovisioned_pv_volumes and vol_ids.autoprovisioned_pv: self.set_autoprovisioned_pv_volume_tags( vol_ids.autoprovisioned_pv, ebs_snapshotter, ebs_util) if self.args.unidentified_volumes and vol_ids.unidentified: self.set_unidentified_volume_tags(vol_ids.unidentified, ebs_snapshotter)
def main(): """ report pv usage """ logger.info( '################################################################################' ) logger.info(' Starting Volume Snapshot Tag Checks - %s', datetime.datetime.now().strftime("%Y-%m-%d %H:%M")) logger.info( '################################################################################' ) logger.debug("main()") args = parse_args() if args.verbose: logger.setLevel(logging.DEBUG) if args.aws_creds_profile: os.environ['AWS_PROFILE'] = args.aws_creds_profile ebs_snapshotter = EbsSnapshotter(args.region, verbose=True) if not ebs_snapshotter.is_region_valid(args.region): logger.info("Invalid region") sys.exit(1) else: logger.info("Region: %s:", args.region) ebs_util = EbsUtil(args.region, verbose=True) ebs_snapshotter = EbsSnapshotter(args.region, verbose=True) volumes = get_pv_volume_ids() status = 0 for volume in volumes: logger.info('Checking pv: %s, volume ID: %s', volume, volumes[volume]) has_tag = validate_volume_tag(ebs_snapshotter, volumes[volume], DAILY_SCHEDULE) if not has_tag: logger.warn('pv :%s has no "snapshot:daily" tags', volume) status = status + 1 send_metrics(status)
class EBSStuckVolumesCheck(object): """ This class houses a check that looks for EBS volumes that are stuck in a transition state (attaching, detaching, busy, etc). """ def __init__(self): """ initialize EBSStuckVolumesCheck class """ self.args = None self.vol_state_data = None self.parse_args() # Make sure we're using the profile they've requested. if self.args.aws_creds_profile: os.environ['AWS_PROFILE'] = self.args.aws_creds_profile self.eu = EbsUtil(self.args.region, verbose=self.args.verbose) self.mts = MetricSender(verbose=self.args.verbose) def parse_args(self): ''' Parse arguments passed to the script ''' parser = argparse.ArgumentParser( description='OpenShift Cluster Metrics Checker') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose output') parser.add_argument('--region', required=True, help='AWS EC2 Region to check') parser.add_argument('--stuck-after', default=120, type=int, help='Amount of time in seconds after which the volume is ' + \ 'determined to be "stuck".') parser.add_argument('--aws-creds-profile', required=False, help='The AWS credentials profile to use.') self.args = parser.parse_args() @staticmethod def read_raw_volume_state_data(): """ Reads in the raw string the volume state data from disk """ if not os.path.isfile(STATE_DATA_FILE): return "" # Act like the file is blank with open(STATE_DATA_FILE, 'r') as stream: return stream.read() def load_volume_state_data(self): """ Loads the volume state data from disk """ if os.path.isfile(STATE_DATA_FILE): with open(STATE_DATA_FILE, 'r') as stream: self.vol_state_data = yaml.load(stream) else: self.vol_state_data = {} def save_volume_state_data(self): """ Saves the volume state data to disk """ with open(STATE_DATA_FILE, 'w') as outfile: yaml.dump(self.vol_state_data, outfile, default_flow_style=False, allow_unicode=True) def add_new_transitioning_volumes(self, trans_vols): """ Adds volumes that we haven't seen before that are in a transitioning state. """ for vol in trans_vols: vol_uri = self.eu.generate_volume_uri(vol) if vol_uri not in self.vol_state_data.keys(): # This is the first time we've seen this volume, add it. vol_uri = self.eu.generate_volume_uri(vol) self.vol_state_data[vol_uri] = {} self.vol_state_data[vol_uri][STUCK_AFTER_KEY] = datetime.now() + \ timedelta(seconds=self.args.stuck_after) self.vol_state_data[vol_uri][VOLUME_ID_KEY] = str(vol.id) self.vol_state_data[vol_uri][STATE_KEY] = TRANSITION_STATE self.vol_state_data[vol_uri][ATTACH_STATUS_KEY] = str( vol.attach_data.status) def set_stuck_volumes(self): """ Sets volumes to state 'stuck' if they've passed their transition state deadline. """ for item in self.vol_state_data.itervalues(): # We don't want to set unstuck volumes back to stuck. if item[STATE_KEY] != UNSTUCK_STATE: if datetime.now() > item[STUCK_AFTER_KEY]: item[STATE_KEY] = STUCK_STATE def set_unstuck_volumes(self, trans_vols): """ Change volumes that were in state 'stuck' that are no longer in transition, to state 'unstuck'. """ trans_vol_ids = [str(vol.id) for vol in trans_vols] for vol_uri, cache_data in self.vol_state_data.iteritems(): if cache_data[STATE_KEY] == STUCK_STATE and \ cache_data[VOLUME_ID_KEY] not in trans_vol_ids: # This volue was stuck, but isn't any longer self.vol_state_data[vol_uri][STATE_KEY] = UNSTUCK_STATE def report_stuck_volumes(self): """ sends data to monitoring that these volumes are stuck. """ for vol_uri, cache_data in self.vol_state_data.iteritems(): if cache_data[STATE_KEY] == STUCK_STATE: self.mts.add_dynamic_metric(EBS_VOLUME_URI_DISC_KEY, EBS_VOLUME_URI_DISC_MACRO, [vol_uri]) item_name = '%s[%s]' % (EBS_VOLUME_ATTACH_STATE_KEY, vol_uri) self.mts.add_metric({item_name: MONITORING_STUCK_VALUE}) # Actually send them self.mts.send_metrics() def report_unstuck_volumes(self): """ sends data to monitoring that these volumes have become unstuck. """ for vol_uri, cache_data in self.vol_state_data.iteritems(): if cache_data[STATE_KEY] == UNSTUCK_STATE: self.mts.add_dynamic_metric(EBS_VOLUME_URI_DISC_KEY, EBS_VOLUME_URI_DISC_MACRO, [vol_uri]) item_name = '%s[%s]' % (EBS_VOLUME_ATTACH_STATE_KEY, vol_uri) self.mts.add_metric({item_name: MONITORING_UNSTUCK_VALUE}) # Actually send them self.mts.send_metrics() def remove_unstuck_volumes_from_state_data(self): """ Removes state 'unstuck' volumes from the state data (no longer need to track them) """ for vol_uri in self.vol_state_data.keys(): cache_data = self.vol_state_data[vol_uri] if cache_data[STATE_KEY] == UNSTUCK_STATE: # This volume was stuck, but isn't any longer del self.vol_state_data[vol_uri] def remove_no_longer_transitioning_volumes(self, trans_vols): """ Remove volumes that were transitioning, but are no longer in the trans_vols list """ trans_vol_ids = [str(vol.id) for vol in trans_vols] for vol_uri in self.vol_state_data.keys(): cache_data = self.vol_state_data[vol_uri] if cache_data[STATE_KEY] == TRANSITION_STATE and \ cache_data[VOLUME_ID_KEY] not in trans_vol_ids: # This volume was transitioning, but isn't any longer del self.vol_state_data[vol_uri] def run(self): """ Run the main logic of this check """ # Load the state machine data self.load_volume_state_data() # Get the volumes that are currently in a transitioning state trans_vols = self.eu.get_trans_attach_status_vols() # Based on that list, weed out the volumes that used to be transitioning, # that are no longer in the transitioning volumes list. This means that # it was a normal volume transition, probably from attaching to attached # or detaching to detached (aka None). self.remove_no_longer_transitioning_volumes(trans_vols) # Check on the volumes that were in the stuck state that are no longer # in the transitioning volumes list. This means that they went from stuck # to unstuck. We need to track these so that we can report that they've become # unstuck to monitoring. self.set_unstuck_volumes(trans_vols) # Add any volumes that are transitioning that we haven't seen before to our data self.add_new_transitioning_volumes(trans_vols) # Change volumes that are still transitioning and have hit their deadline to # finish that transition to a state of "stuck" self.set_stuck_volumes() # Report to monitoring the stuck volumes self.report_stuck_volumes() # Report to monitoring the volumes that were stuck, but are now unstuck (no # longer transitioning) self.report_unstuck_volumes() # Since the unstuck volumes have been reported, they can safeuly be removed from # our tracking now. self.remove_unstuck_volumes_from_state_data() # Make sure we save state for the next run. self.save_volume_state_data() self.eu.verbose_print("\nTracking Volumes") self.eu.verbose_print("----------------\n") # Cat out the state file raw_state_file = self.read_raw_volume_state_data() self.eu.verbose_print(raw_state_file)
def main(self): """ Serves as the entry point for the CLI """ if self.args.aws_creds_profile: os.environ['AWS_PROFILE'] = self.args.aws_creds_profile for region in EbsUtil.get_supported_regions(): print print "Region: %s:" % region eu = EbsUtil(region.name, verbose=True) es = EbsSnapshotter(region.name, verbose=True) # filter out the already tagged volumes skip_volume_ids = [] if not self.args.retag_volumes: # They don't want us to retag volumes that are already tagged, so # add the already tagged volumes to the list of volume IDs to skip. skip_volume_ids += es.get_already_tagged_volume_ids() vol_ids = eu.get_classified_volume_ids(skip_volume_ids) ## Actually create the snapshot tags now if self.args.master_root_volumes and vol_ids.master_root: print print " Setting master root volume tags:" es.create_volume_snapshot_tag(vol_ids.master_root, self.args.master_root_volumes, prefix=" ", dry_run=self.args.dry_run) if self.args.node_root_volumes and vol_ids.node_root: print print " Setting node root volume tags:" es.create_volume_snapshot_tag(vol_ids.node_root, self.args.node_root_volumes, prefix=" ", dry_run=self.args.dry_run) if self.args.docker_storage_volumes and vol_ids.docker_storage: print print " Setting docker storage volume tags:" es.create_volume_snapshot_tag(vol_ids.docker_storage, self.args.docker_storage_volumes, prefix=" ", dry_run=self.args.dry_run) if self.args.autoprovisioned_pv_volumes and vol_ids.autoprovisioned_pv: print print " Setting autoprovisioned pv volume tags:" es.create_volume_snapshot_tag( vol_ids.autoprovisioned_pv, self.args.autoprovisioned_pv_volumes, prefix=" ", dry_run=self.args.dry_run) if self.args.manually_provisioned_pv_volumes and vol_ids.manually_provisioned_pv: print print " Setting manually provisioned pv volume tags:" es.create_volume_snapshot_tag( vol_ids.manually_provisioned_pv, self.args.manually_provisioned_pv_volumes, prefix=" ", dry_run=self.args.dry_run) if self.args.unidentified_volumes and vol_ids.unidentified: print print " Setting unidentified volume tags:" es.create_volume_snapshot_tag(vol_ids.unidentified, self.args.unidentified_volumes, prefix=" ", dry_run=self.args.dry_run)