Beispiel #1
0
 def __init__(self, config, ec2, instance_db, lock):
     self.config = config
     self.ec2 = ec2
     self.instance_db = instance_db
     self.lock = lock
     self.sns = SNSChannel(config)
Beispiel #2
0
class SnapshotRunner(object):

    # key to min time since last backup b4 we take a new one for the
    # period.

    allowed_periods = {
        "daily": timedelta(0.9),
        "weekly": timedelta(6.9),
        "monthly": timedelta(27.5)}

    def __init__(self, config, ec2, instance_db, lock):
        self.config = config
        self.ec2 = ec2
        self.instance_db = instance_db
        self.lock = lock
        self.sns = SNSChannel(config)

    def get_snapshot_instances(self, options):
        """ Get all instances registered for auto snapshots
        """

        if options.tag:
            return self._get_tagged_instances(options.tag)

        return self._get_registered_instances()

    def _get_tagged_instances(self, tag):
        """Support instance selection for backup based on a tag value.
        """
        tag_name, tag_value = tag.split(":", 1)
        for r in self.ec2.get_all_instances(
                filters={'tag:%s' % tag_name: tag_value}):
            for i in r.instances:
                yield ({}, i)

    def _get_registered_instances(self):
        """Support instance backup based on registration.
        """
        # hmm.. scan over all apps and all instances, sort of worst case.
        # this could be made more efficient, batching 20 instances at a time.
        for record in self.instance_db.scan():
            # TODO check for non existant instance and mark/dead
            results = self.ec2.get_all_instances([record['instance_id']])
            if not results:
                log.warning(
                    "Could not find registered instance %s",
                    record["instance_id"])
                continue
            for r in results:
                i = r.instances.pop()
                yield (record, i)

    def get_instance_volumes(self, i):
        if i.root_device_type != "ebs":
            log.warning(
                "Not backing up instance: %s/%s non ebs root device",
                i.id, i.tags.get("Name", "NA"))
            return
        devs = i.block_device_mapping.items()
        # Refuse the temptation to guess. If there are multiple volumes
        # attached to an instance, it could be raided/lvm/etc and we need
        # coordination with the instance to get a multi-volume consistent snap.
        if len(devs) > 2:
            log.warning(
                "Not backing up instance: %s/%s, more than one volume",
                i.id, i.tags.get("Name", "NA"))
            return

        for dev_name, bdt in devs:
            if not bdt.volume_id:
                continue
            yield bdt.volume_id, dev_name

    def run_period(self, options):
        """ Create backups for the given period for all registered instances.
        """

        period = options.period
        now = datetime.now(tzutc())
        log.info("Creating snapshots for %s on %s" % (
            period, now.strftime("%Y/%m/%d")))
        for r, i in self.get_snapshot_instances(options):
            with self.lock.acquire("snapshot-%s" % i.id):
                for vol_id, dev in self.get_instance_volumes(i):
                    self._snapshot_instance(r, i, vol_id, dev, now, period)

    def _snapshot_instance(self, r, i, vol_id, dev, now, period):
        """
        arg: r -> record
        arg: i -> boto ec2 instance
        arg: now -> datetime of cur time.
        """
        # Get previous snapshots
        snapshots = self.ec2.get_all_snapshots(
            filters={'tag:inst_snap': "%s/%s" % (i.id, period)})
        snapshots.sort(
            key=operator.attrgetter('start_time'), reverse=True)

        name = r.get('unit_name') or i.tags.get('Name') or i.id

        # Check if its too soon for a new snapshot from the last
        if snapshots:
            last_snapshot = date_parse(snapshots[0].start_time)
            if now - last_snapshot < self.allowed_periods[period]:
                log.warning(
                    "Skipping %s, last snapshot for %s was %s",
                    name, period, now - last_snapshot)
                return

        # Create new snapshot
        description = "%s %s %s" % (
            name, period.capitalize(), now.strftime("%Y-%m-%d"))
        log.debug("Snapshotting %s on %s as %s",
                  i.id, vol_id, description)
        snapshot = self.ec2.create_snapshot(vol_id, description)
        snapshot.add_tag('Name', description)

        # Copy over instance tags to the snapshot except name.
        for k, v in i.tags.items():
            if k == "Name":
                continue
            snapshot.add_tag(k, v)

        # If the instance was registered with an app id, and the
        # instance doesn't already have one, then copy over the
        # registed one as a tag.
        if 'app_id' in r and not 'app_id' in i.tags:
            snapshot.add_tag('app_id', r['app_id'])

        # Record metadata for restoration and backup system
        snapshot.add_tag('inst_snap', "%s/%s" % (i.id, period))
        snapshot.add_tag('inst_dev', dev)

        # Trim extras
        backup_count = self.config.get("%s-backups" % period)
        snapshots.insert(0, snapshot)
        if len(snapshots) <= backup_count:
            return
        log.info("Trimming excess %s snapshots %s max:%d existing:%d" % (
            period,
            [s.tags.get('Name') for s in snapshots[backup_count:]],
            backup_count,
            len(snapshots)))

        for s in snapshots[backup_count:]:
            try:
                s.delete()
            except Exception as e:
                ## don't exit if the snapshot can't be deleted.
                msg = "{}: Trimming {} failed for {}: '{}'".format(
                    now.strftime("%Y-%m-%d %H:%M:%S"), s, i.id, e.message)
                log.critical(msg)
                self.sns.notify(msg, "iaas-backup: Error trimming {} "
                    .format(s))

    def _get_instance(self, options):
        reservations = self.ec2.get_all_instances([options.instance_id])

        if not len(reservations) == 1:
            log.error("Invalid instance id %s" % options.instance_id)
            return
        if not len(reservations[0].instances) == 1:
            log.error("Invalid instance id %s" % options.instance_id)
            return
        instance = reservations[0].instances[0]
        return instance

    def register(self, options):
        """Register an instance for the snapshot system.
        """
        instance = getattr(options, 'instance', None)
        if instance is None:
            instance = self._get_instance(options)
            if instance is None:
                return
        log.info("Registering snapshot instance")

        vol_id = self.get_instance_volume(instance)
        if vol_id is None:
            return

        item = self.instance_db.new_item(
            options.app_id, instance.id, {
                'record': instance.id,
                'unit_name': options.unit and options.unit.strip() or ""})
        item.save()
        log.info("Instance %s registered for snapshots",
                 instance.id)
        return True