Exemple #1
0
    def main(self):
        """ Serves as the entry point for the CLI """

        logger.info('Starting snapshot tagging')

        if self.args.aws_creds_profile:
            os.environ['AWS_PROFILE'] = self.args.aws_creds_profile

        ebs_snapshotter = EbsSnapshotter(self.args.region, verbose=True)
        if not ebs_snapshotter.is_region_valid(self.args.region):
            logger.info("Invalid region")
            sys.exit(1)
        else:
            logger.info("Region: %s:", self.args.region)
            ebs_util = EbsUtil(self.args.region, verbose=True)
            ebs_snapshotter = EbsSnapshotter(self.args.region, verbose=True)

            # filter out the already tagged volumes
            skip_volume_ids = []

            if not self.args.retag_volumes:
                # They don't want us to retag volumes that are already tagged, so
                # add the already tagged volumes to the list of volume IDs to skip.
                skip_volume_ids += ebs_snapshotter.get_already_tagged_volume_ids(
                )
                logger.info('Skipping this many volume ids: %s',
                            len(skip_volume_ids))

            vol_ids = ebs_util.get_classified_volume_ids(skip_volume_ids)
            for id_name, id_list in vol_ids._asdict().iteritems():
                logger.info('name: %s amount: %s', id_name, len(id_list))

            ## Actually create the snapshot tags now
            if self.args.master_root_volumes and vol_ids.master_root:
                self.set_master_root_volume_tags(vol_ids.master_root,
                                                 ebs_snapshotter, ebs_util)

            if self.args.node_root_volumes and vol_ids.node_root:
                self.set_node_root_volume_tags(vol_ids.node_root,
                                               ebs_snapshotter, ebs_util)

            if self.args.docker_storage_volumes and vol_ids.docker_storage:
                self.set_docker_storage_volume_tags(vol_ids.docker_storage,
                                                    ebs_snapshotter, ebs_util)

            if self.args.manually_provisioned_pv_volumes and vol_ids.manually_provisioned_pv:
                self.set_manually_provisioned_pv_volume_tags(
                    vol_ids.manually_provisioned_pv, ebs_snapshotter, ebs_util)

            if self.args.autoprovisioned_pv_volumes and vol_ids.autoprovisioned_pv:
                self.set_autoprovisioned_pv_volume_tags(
                    vol_ids.autoprovisioned_pv, ebs_snapshotter, ebs_util)

            if self.args.unidentified_volumes and vol_ids.unidentified:
                self.set_unidentified_volume_tags(vol_ids.unidentified,
                                                  ebs_snapshotter)
Exemple #2
0
    def __init__(self):
        """ initialize EBSStuckVolumesCheck class """
        self.args = None
        self.vol_state_data = None

        self.parse_args()

        # Make sure we're using the profile they've requested.
        if self.args.aws_creds_profile:
            os.environ['AWS_PROFILE'] = self.args.aws_creds_profile

        self.eu = EbsUtil(self.args.region, verbose=self.args.verbose)
        self.mts = MetricSender(verbose=self.args.verbose)
Exemple #3
0
    def main(self):
        """ Serves as the entry point for the CLI """
        if self.args.aws_creds_profile:
            os.environ['AWS_PROFILE'] = self.args.aws_creds_profile

        for region in EbsUtil.get_supported_regions():
            print
            print "Region: %s:" % region
            ebs_util = EbsUtil(region.name, verbose=True)
            ebs_snapshotter = EbsSnapshotter(region.name, verbose=True)

            # filter out the already tagged volumes
            skip_volume_ids = []

            if not self.args.retag_volumes:
                # They don't want us to retag volumes that are already tagged, so
                # add the already tagged volumes to the list of volume IDs to skip.
                skip_volume_ids += ebs_snapshotter.get_already_tagged_volume_ids(
                )

            vol_ids = ebs_util.get_classified_volume_ids(skip_volume_ids)

            ## Actually create the snapshot tags now
            if self.args.master_root_volumes and vol_ids.master_root:
                self.set_master_root_volume_tags(vol_ids.master_root,
                                                 ebs_snapshotter, ebs_util)

            if self.args.node_root_volumes and vol_ids.node_root:
                self.set_node_root_volume_tags(vol_ids.node_root,
                                               ebs_snapshotter, ebs_util)

            if self.args.docker_storage_volumes and vol_ids.docker_storage:
                self.set_docker_storage_volume_tags(vol_ids.docker_storage,
                                                    ebs_snapshotter, ebs_util)

            if self.args.manually_provisioned_pv_volumes and vol_ids.manually_provisioned_pv:
                self.set_manually_provisioned_pv_volume_tags(
                    vol_ids.manually_provisioned_pv, ebs_snapshotter, ebs_util)

            if self.args.autoprovisioned_pv_volumes and vol_ids.autoprovisioned_pv:
                self.set_autoprovisioned_pv_volume_tags(
                    vol_ids.autoprovisioned_pv, ebs_snapshotter, ebs_util)

            if self.args.unidentified_volumes and vol_ids.unidentified:
                self.set_unidentified_volume_tags(vol_ids.unidentified,
                                                  ebs_snapshotter)
Exemple #4
0
def main():
    """ report pv usage  """

    logger.info(
        '################################################################################'
    )
    logger.info('  Starting Volume Snapshot Tag Checks - %s',
                datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))
    logger.info(
        '################################################################################'
    )
    logger.debug("main()")

    args = parse_args()
    if args.verbose:
        logger.setLevel(logging.DEBUG)

    if args.aws_creds_profile:
        os.environ['AWS_PROFILE'] = args.aws_creds_profile

    ebs_snapshotter = EbsSnapshotter(args.region, verbose=True)
    if not ebs_snapshotter.is_region_valid(args.region):
        logger.info("Invalid region")
        sys.exit(1)
    else:
        logger.info("Region: %s:", args.region)
        ebs_util = EbsUtil(args.region, verbose=True)
        ebs_snapshotter = EbsSnapshotter(args.region, verbose=True)

    volumes = get_pv_volume_ids()
    status = 0
    for volume in volumes:
        logger.info('Checking pv: %s, volume ID: %s', volume, volumes[volume])
        has_tag = validate_volume_tag(ebs_snapshotter, volumes[volume],
                                      DAILY_SCHEDULE)
        if not has_tag:
            logger.warn('pv :%s has no "snapshot:daily" tags', volume)
            status = status + 1

    send_metrics(status)
Exemple #5
0
class EBSStuckVolumesCheck(object):
    """
       This class houses a check that looks for EBS volumes that are stuck in a
       transition state (attaching, detaching, busy, etc).
    """
    def __init__(self):
        """ initialize EBSStuckVolumesCheck class """
        self.args = None
        self.vol_state_data = None

        self.parse_args()

        # Make sure we're using the profile they've requested.
        if self.args.aws_creds_profile:
            os.environ['AWS_PROFILE'] = self.args.aws_creds_profile

        self.eu = EbsUtil(self.args.region, verbose=self.args.verbose)
        self.mts = MetricSender(verbose=self.args.verbose)

    def parse_args(self):
        ''' Parse arguments passed to the script '''
        parser = argparse.ArgumentParser(
            description='OpenShift Cluster Metrics Checker')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose output')
        parser.add_argument('--region',
                            required=True,
                            help='AWS EC2 Region to check')
        parser.add_argument('--stuck-after', default=120, type=int,
                            help='Amount of time in seconds after which the volume is ' + \
                                 'determined to be "stuck".')
        parser.add_argument('--aws-creds-profile',
                            required=False,
                            help='The AWS credentials profile to use.')

        self.args = parser.parse_args()

    @staticmethod
    def read_raw_volume_state_data():
        """ Reads in the raw string the volume state data from disk """
        if not os.path.isfile(STATE_DATA_FILE):
            return ""  # Act like the file is blank

        with open(STATE_DATA_FILE, 'r') as stream:
            return stream.read()

    def load_volume_state_data(self):
        """ Loads the volume state data from disk """
        if os.path.isfile(STATE_DATA_FILE):
            with open(STATE_DATA_FILE, 'r') as stream:
                self.vol_state_data = yaml.load(stream)
        else:
            self.vol_state_data = {}

    def save_volume_state_data(self):
        """ Saves the volume state data to disk """
        with open(STATE_DATA_FILE, 'w') as outfile:
            yaml.dump(self.vol_state_data,
                      outfile,
                      default_flow_style=False,
                      allow_unicode=True)

    def add_new_transitioning_volumes(self, trans_vols):
        """ Adds volumes that we haven't seen before that are in a transitioning state. """
        for vol in trans_vols:
            vol_uri = self.eu.generate_volume_uri(vol)

            if vol_uri not in self.vol_state_data.keys():
                # This is the first time we've seen this volume, add it.
                vol_uri = self.eu.generate_volume_uri(vol)
                self.vol_state_data[vol_uri] = {}
                self.vol_state_data[vol_uri][STUCK_AFTER_KEY] = datetime.now() + \
                    timedelta(seconds=self.args.stuck_after)
                self.vol_state_data[vol_uri][VOLUME_ID_KEY] = str(vol.id)
                self.vol_state_data[vol_uri][STATE_KEY] = TRANSITION_STATE

            self.vol_state_data[vol_uri][ATTACH_STATUS_KEY] = str(
                vol.attach_data.status)

    def set_stuck_volumes(self):
        """ Sets volumes to state 'stuck' if they've passed their transition state deadline. """
        for item in self.vol_state_data.itervalues():
            # We don't want to set unstuck volumes back to stuck.
            if item[STATE_KEY] != UNSTUCK_STATE:
                if datetime.now() > item[STUCK_AFTER_KEY]:
                    item[STATE_KEY] = STUCK_STATE

    def set_unstuck_volumes(self, trans_vols):
        """
            Change volumes that were in state 'stuck' that are no longer in transition,
            to state 'unstuck'.
        """

        trans_vol_ids = [str(vol.id) for vol in trans_vols]

        for vol_uri, cache_data in self.vol_state_data.iteritems():
            if cache_data[STATE_KEY] == STUCK_STATE and \
               cache_data[VOLUME_ID_KEY] not in trans_vol_ids:
                # This volue was stuck, but isn't any longer
                self.vol_state_data[vol_uri][STATE_KEY] = UNSTUCK_STATE

    def report_stuck_volumes(self):
        """ sends data to monitoring that these volumes are stuck. """
        for vol_uri, cache_data in self.vol_state_data.iteritems():
            if cache_data[STATE_KEY] == STUCK_STATE:
                self.mts.add_dynamic_metric(EBS_VOLUME_URI_DISC_KEY,
                                            EBS_VOLUME_URI_DISC_MACRO,
                                            [vol_uri])

                item_name = '%s[%s]' % (EBS_VOLUME_ATTACH_STATE_KEY, vol_uri)
                self.mts.add_metric({item_name: MONITORING_STUCK_VALUE})

        # Actually send them
        self.mts.send_metrics()

    def report_unstuck_volumes(self):
        """ sends data to monitoring that these volumes have become unstuck. """
        for vol_uri, cache_data in self.vol_state_data.iteritems():
            if cache_data[STATE_KEY] == UNSTUCK_STATE:
                self.mts.add_dynamic_metric(EBS_VOLUME_URI_DISC_KEY,
                                            EBS_VOLUME_URI_DISC_MACRO,
                                            [vol_uri])

                item_name = '%s[%s]' % (EBS_VOLUME_ATTACH_STATE_KEY, vol_uri)
                self.mts.add_metric({item_name: MONITORING_UNSTUCK_VALUE})

        # Actually send them
        self.mts.send_metrics()

    def remove_unstuck_volumes_from_state_data(self):
        """ Removes state 'unstuck' volumes from the state data (no longer need to track them) """
        for vol_uri in self.vol_state_data.keys():
            cache_data = self.vol_state_data[vol_uri]
            if cache_data[STATE_KEY] == UNSTUCK_STATE:
                # This volume was stuck, but isn't any longer
                del self.vol_state_data[vol_uri]

    def remove_no_longer_transitioning_volumes(self, trans_vols):
        """ Remove volumes that were transitioning, but are no longer in the trans_vols list """

        trans_vol_ids = [str(vol.id) for vol in trans_vols]

        for vol_uri in self.vol_state_data.keys():
            cache_data = self.vol_state_data[vol_uri]
            if cache_data[STATE_KEY] == TRANSITION_STATE and \
               cache_data[VOLUME_ID_KEY] not in trans_vol_ids:
                # This volume was transitioning, but isn't any longer
                del self.vol_state_data[vol_uri]

    def run(self):
        """ Run the main logic of this check """

        # Load the state machine data
        self.load_volume_state_data()

        # Get the volumes that are currently in a transitioning state
        trans_vols = self.eu.get_trans_attach_status_vols()

        # Based on that list, weed out the volumes that used to be transitioning,
        # that are no longer in the transitioning volumes list. This means that
        # it was a normal volume transition, probably from attaching to attached
        # or detaching to detached (aka None).
        self.remove_no_longer_transitioning_volumes(trans_vols)

        # Check on the volumes that were in the stuck state that are no longer
        # in the transitioning volumes list. This means that they went from stuck
        # to unstuck. We need to track these so that we can report that they've become
        # unstuck to monitoring.
        self.set_unstuck_volumes(trans_vols)

        # Add any volumes that are transitioning that we haven't seen before to our data
        self.add_new_transitioning_volumes(trans_vols)

        # Change volumes that are still transitioning and have hit their deadline to
        # finish that transition to a state of "stuck"
        self.set_stuck_volumes()

        # Report to monitoring the stuck volumes
        self.report_stuck_volumes()

        # Report to monitoring the volumes that were stuck, but are now unstuck (no
        # longer transitioning)
        self.report_unstuck_volumes()

        # Since the unstuck volumes have been reported, they can safeuly be removed from
        # our tracking now.
        self.remove_unstuck_volumes_from_state_data()

        # Make sure we save state for the next run.
        self.save_volume_state_data()

        self.eu.verbose_print("\nTracking Volumes")
        self.eu.verbose_print("----------------\n")

        # Cat out the state file
        raw_state_file = self.read_raw_volume_state_data()
        self.eu.verbose_print(raw_state_file)
Exemple #6
0
    def main(self):
        """ Serves as the entry point for the CLI """
        if self.args.aws_creds_profile:
            os.environ['AWS_PROFILE'] = self.args.aws_creds_profile

        for region in EbsUtil.get_supported_regions():
            print
            print "Region: %s:" % region
            eu = EbsUtil(region.name, verbose=True)
            es = EbsSnapshotter(region.name, verbose=True)

            # filter out the already tagged volumes
            skip_volume_ids = []

            if not self.args.retag_volumes:
                # They don't want us to retag volumes that are already tagged, so
                # add the already tagged volumes to the list of volume IDs to skip.
                skip_volume_ids += es.get_already_tagged_volume_ids()

            vol_ids = eu.get_classified_volume_ids(skip_volume_ids)

            ## Actually create the snapshot tags now
            if self.args.master_root_volumes and vol_ids.master_root:
                print
                print "  Setting master root volume tags:"
                es.create_volume_snapshot_tag(vol_ids.master_root,
                                              self.args.master_root_volumes,
                                              prefix="    ",
                                              dry_run=self.args.dry_run)

            if self.args.node_root_volumes and vol_ids.node_root:
                print
                print "  Setting node root volume tags:"
                es.create_volume_snapshot_tag(vol_ids.node_root,
                                              self.args.node_root_volumes,
                                              prefix="    ",
                                              dry_run=self.args.dry_run)

            if self.args.docker_storage_volumes and vol_ids.docker_storage:
                print
                print "  Setting docker storage volume tags:"
                es.create_volume_snapshot_tag(vol_ids.docker_storage,
                                              self.args.docker_storage_volumes,
                                              prefix="    ",
                                              dry_run=self.args.dry_run)

            if self.args.autoprovisioned_pv_volumes and vol_ids.autoprovisioned_pv:
                print
                print "  Setting autoprovisioned pv volume tags:"
                es.create_volume_snapshot_tag(
                    vol_ids.autoprovisioned_pv,
                    self.args.autoprovisioned_pv_volumes,
                    prefix="    ",
                    dry_run=self.args.dry_run)

            if self.args.manually_provisioned_pv_volumes and vol_ids.manually_provisioned_pv:
                print
                print "  Setting manually provisioned pv volume tags:"
                es.create_volume_snapshot_tag(
                    vol_ids.manually_provisioned_pv,
                    self.args.manually_provisioned_pv_volumes,
                    prefix="    ",
                    dry_run=self.args.dry_run)

            if self.args.unidentified_volumes and vol_ids.unidentified:
                print
                print "  Setting unidentified volume tags:"
                es.create_volume_snapshot_tag(vol_ids.unidentified,
                                              self.args.unidentified_volumes,
                                              prefix="    ",
                                              dry_run=self.args.dry_run)