Ejemplo n.º 1
0
class StatsPublisher(object):
    DEFAULT_PUBLISH_INTERVAL_SECS = 20.0

    def __init__(self, tsdb):
        self._logger = logging.getLogger(__name__)
        self._db = tsdb
        self._last_seen_ts = 0

        # XXX plugin configuration should be decoupled from agent_config arg
        # parsing
        self._agent_config = common.services.get(ServiceName.AGENT_CONFIG)
        self._host_id = self._agent_config.host_id
        self._publish_interval_secs = float(
            self._agent_config.__dict__.get(
                "stats_publish_interval",
                StatsPublisher.DEFAULT_PUBLISH_INTERVAL_SECS))

        self._publisher_thread = None
        self._publishers = []

    def start_publishing(self):
        self._publisher_thread = Periodic(self.publish,
                                          self._publish_interval_secs)
        self._publisher_thread.daemon = True
        self._publisher_thread.start()

    def stop_publishing(self):
        if self._publisher_thread is not None:
            self._publisher_thread.stop()

    def register_publisher(self, publisher):
        """ Add a new publisher

        Args:
            publisher: Publisher instance
        """
        self._publishers.append(publisher)

    def configure_publishers(self):
        host = self._agent_config.stats_store_endpoint
        pm_publisher = GraphitePublisher(host_id=self._host_id,
                                         carbon_host=host)
        self.register_publisher(pm_publisher)

    def publish(self):
        retrieved_stats = {}
        latest_ts = self._last_seen_ts
        for metric in self._db.get_keys():
            values = self._db.get_values_since(self._last_seen_ts, metric)
            retrieved_stats[metric] = values
            if values:
                latest_ts = max(latest_ts, max([x[0] for x in values]))

        self._last_seen_ts = latest_ts
        if retrieved_stats:
            for publisher in self._publishers:
                self._logger.info("publish metrics with %s" % str(publisher))
                publisher.publish(retrieved_stats)
Ejemplo n.º 2
0
class StatsCollector(object):
    DEFAULT_COLLECT_INTERVAL_SECS = 20.0

    def __init__(self, tsdb):
        self._logger = logging.getLogger(__name__)

        # XXX plugin configuration should be decoupled from agent_config arg
        # parsing
        agent_config = common.services.get(ServiceName.AGENT_CONFIG)
        self._collect_interval_secs = float(
            agent_config.__dict__.get(
                "stats_collection_interval",
                StatsCollector.DEFAULT_COLLECT_INTERVAL_SECS))

        self._collector_thread = None
        self._collectors = []

        # Cache up to 1 hour's worth of metrics
        self._metric_cache = tsdb
        assert (self._collect_interval_secs < 3600)
        freq_str = "%ds" % self._collect_interval_secs
        self._metric_cache.set_policy(freq_str, "1h")
        self._last_publish_ts = datetime.now()

    def start_collection(self):
        self._collector_thread = Periodic(self.collect,
                                          self._collect_interval_secs)
        self._collector_thread.daemon = True
        self._collector_thread.start()

    def stop_collection(self):
        if self._collector_thread is not None:
            self._collector_thread.stop()

    def register_collector(self, collector):
        """ Add a new collector

        Args:
            collector: Collector instance
        """
        self._collectors.append(collector)

    def configure_collectors(self):
        # XXX List of collectors are hard coded for now.
        pm_collector = PerfManagerCollector()
        self.register_collector(pm_collector)
        self._logger.info("Stats collector configured")

    def collect(self):
        for c in self._collectors:
            since = self._last_publish_ts
            self._last_publish_ts = datetime.now()
            metrics = c.collect(since=since)
            for key in metrics.keys():
                self._logger.debug("Metrics collected %s -> %s" %
                                   (key, metrics[key]))
                for value_tuple in metrics[key]:
                    self._metric_cache.add(key, value_tuple[0], value_tuple[1])
Ejemplo n.º 3
0
class StatsPublisher(object):
    DEFAULT_PUBLISH_INTERVAL_SECS = 20.0

    def __init__(self, tsdb):
        self._logger = logging.getLogger(__name__)
        self._db = tsdb
        self._last_seen_ts = 0

        # XXX plugin configuration should be decoupled from agent_config arg
        # parsing
        self._agent_config = common.services.get(ServiceName.AGENT_CONFIG)
        self._host_id = self._agent_config.host_id
        self._publish_interval_secs = float(self._agent_config.__dict__.get(
            "stats_publish_interval",
            StatsPublisher.DEFAULT_PUBLISH_INTERVAL_SECS))

        self._publisher_thread = None
        self._publishers = []

    def start_publishing(self):
        self._publisher_thread = Periodic(self.publish,
                                          self._publish_interval_secs)
        self._publisher_thread.daemon = True
        self._publisher_thread.start()

    def stop_publishing(self):
        if self._publisher_thread is not None:
            self._publisher_thread.stop()

    def register_publisher(self, publisher):
        """ Add a new publisher

        Args:
            publisher: Publisher instance
        """
        self._publishers.append(publisher)

    def configure_publishers(self):
        host = self._agent_config.stats_store_endpoint
        pm_publisher = GraphitePublisher(host_id=self._host_id,
                                         carbon_host=host)
        self.register_publisher(pm_publisher)

    def publish(self):
        retrieved_stats = {}
        latest_ts = self._last_seen_ts
        for metric in self._db.get_keys():
            values = self._db.get_values_since(self._last_seen_ts, metric)
            retrieved_stats[metric] = values
            if values:
                latest_ts = max(latest_ts, max([x[0] for x in values]))

        self._last_seen_ts = latest_ts
        if retrieved_stats:
            for publisher in self._publishers:
                self._logger.info("publish metrics with %s" % str(publisher))
                publisher.publish(retrieved_stats)
Ejemplo n.º 4
0
class StatsCollector(object):
    DEFAULT_COLLECT_INTERVAL_SECS = 20.0

    def __init__(self, tsdb):
        self._logger = logging.getLogger(__name__)

        # XXX plugin configuration should be decoupled from agent_config arg
        # parsing
        agent_config = common.services.get(ServiceName.AGENT_CONFIG)
        self._collect_interval_secs = float(agent_config.__dict__.get(
            "stats_collection_interval",
            StatsCollector.DEFAULT_COLLECT_INTERVAL_SECS))

        self._collector_thread = None
        self._collectors = []

        # Cache up to 1 hour's worth of metrics
        self._metric_cache = tsdb
        assert(self._collect_interval_secs < 3600)
        freq_str = "%ds" % self._collect_interval_secs
        self._metric_cache.set_policy(freq_str, "1h")
        self._last_publish_ts = datetime.now()

    def start_collection(self):
        self._collector_thread = Periodic(self.collect,
                                          self._collect_interval_secs)
        self._collector_thread.daemon = True
        self._collector_thread.start()

    def stop_collection(self):
        if self._collector_thread is not None:
            self._collector_thread.stop()

    def register_collector(self, collector):
        """ Add a new collector

        Args:
            collector: Collector instance
        """
        self._collectors.append(collector)

    def configure_collectors(self):
        # XXX List of collectors are hard coded for now.
        pm_collector = PerfManagerCollector()
        self.register_collector(pm_collector)

    def collect(self):
        for c in self._collectors:
            self._logger.debug("Collecting from %s" % str(c))
            since = self._last_publish_ts
            self._last_publish_ts = datetime.now()
            metrics = c.collect(since=since)
            for key in metrics.keys():
                self._logger.debug(" %s -> %s" % (key, metrics[key]))
                for value_tuple in metrics[key]:
                    self._metric_cache.add(key, value_tuple[0], value_tuple[1])
Ejemplo n.º 5
0
    def test_periodic(self):
        cc = CallCounter()
        periodic_thread = Periodic(cc.test_fn, 0.05)
        periodic_thread.start()
        self.assertTrue(self._match_thread_count(self._thread_count + 1))

        self._check_value(self._match_repeated_calls, cc)

        periodic_thread.stop(wait=True)
        self.assertTrue(self._match_thread_count(self._thread_count))
Ejemplo n.º 6
0
    def test_periodic(self):
        cc = CallCounter()
        periodic_thread = Periodic(cc.test_fn, 0.05)
        periodic_thread.start()
        self.assertTrue(self._match_thread_count(self._thread_count + 1))

        self._check_value(self._match_repeated_calls, cc)

        periodic_thread.stop(wait=True)
        self.assertTrue(self._match_thread_count(self._thread_count))
 def start_publishing(self):
     self._publisher_thread = Periodic(self.publish,
                                       self._publish_interval_secs)
     self._publisher_thread.daemon = True
     self._publisher_thread.start()
class StatsPublisher(object):
    DEFAULT_PUBLISH_INTERVAL_SECS = 20.0
    DEFAULT_PUBLISH_TRY_COUNT = 10
    DEFAULT_FAILED_PUBLISH_INTERVAL_SECS = 10 * 60

    def __init__(
            self,
            tsdb,
            publish_try_count=DEFAULT_PUBLISH_TRY_COUNT,
            failed_publish_interval_secs=DEFAULT_FAILED_PUBLISH_INTERVAL_SECS):
        self._logger = logging.getLogger(__name__)
        self._db = tsdb
        self._last_seen_ts = 0
        self.failed_count = 0
        self.publish_try_count = publish_try_count
        self.failed_publish_interval_secs = failed_publish_interval_secs

        # XXX plugin configuration should be decoupled from agent_config arg
        # parsing
        self._agent_config = common.services.get(ServiceName.AGENT_CONFIG)
        self._hostname = self._agent_config.hostname
        if self._hostname is None:
            self._hostname = socket.gethostname()

        self._publish_interval_secs = float(
            self._agent_config.__dict__.get(
                "stats_publish_interval",
                StatsPublisher.DEFAULT_PUBLISH_INTERVAL_SECS))

        self._publisher_thread = None
        self._publishers = []

    def start_publishing(self):
        self._publisher_thread = Periodic(self.publish,
                                          self._publish_interval_secs)
        self._publisher_thread.daemon = True
        self._publisher_thread.start()

    def stop_publishing(self):
        if self._publisher_thread is not None:
            self._publisher_thread.stop()

    def register_publisher(self, publisher):
        """ Add a new publisher

        Args:
            publisher: Publisher instance
        """
        self._publishers.append(publisher)

    def configure_publishers(self):
        stats_store_endpoint = self._agent_config.stats_store_endpoint
        stats_store_port = self._agent_config.stats_store_port
        stats_host_tags = self._agent_config.stats_host_tags
        pm_publisher = GraphitePublisher(hostname=self._hostname,
                                         carbon_host=stats_store_endpoint,
                                         carbon_port=stats_store_port,
                                         host_tags=stats_host_tags)
        self.register_publisher(pm_publisher)
        self._logger.info("Stats publisher configured")

    def publish(self):
        if len(self._publishers) <= 0:
            self._logger.debug("No publishers found.")
            return

        retrieved_stats = {}
        latest_ts = self._last_seen_ts

        self._logger.debug("DB metrics size %d" % len(self._db.get_keys()))

        for metric in self._db.get_keys():
            values = self._db.get_values_since(self._last_seen_ts, metric)
            retrieved_stats[metric] = values
            if values:
                latest_ts = max(latest_ts, max([x[0] for x in values]))

        self._last_seen_ts = latest_ts
        if len(retrieved_stats) > 0:
            # Use first publisher by default for now
            publisher = self._publishers[0]
            published = publisher.publish(retrieved_stats)
            if not published:
                self.failed_count += 1
                self._logger.critical(
                    "Publisher failed to publish stats, failed_count:%s" %
                    str(self.failed_count))
            elif self.failed_count > 0:
                self.failed_count = 0
                self._publisher_thread.update_wait_interval(
                    self.DEFAULT_PUBLISH_INTERVAL_SECS)
        else:
            self._logger.debug("No metrics to send")

        if self.failed_count >= self.publish_try_count:
            self.failed_count = 0
            self._logger.critical(
                "Too many failed attempts to publish stats. Publisher will sleep for %s seconds now"
                % str(self.failed_publish_interval_secs))
            self._publisher_thread.update_wait_interval(
                self.failed_publish_interval_secs)
Ejemplo n.º 9
0
 def start_collection(self):
     self._collector_thread = Periodic(self.collect,
                                       self._collect_interval_secs)
     self._collector_thread.daemon = True
     self._collector_thread.start()
Ejemplo n.º 10
0
class EsxImageManager(ImageManager):
    NUM_MAKEDIRS_ATTEMPTS = 10
    DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL = 600.0
    REAP_TMP_IMAGES_GRACE_PERIOD = 600.0
    IMAGE_MARKER_FILE_NAME = "unused_image_marker.txt"
    IMAGE_TIMESTAMP_FILE_NAME = "image_timestamp.txt"
    IMAGE_TIMESTAMP_FILE_RENAME_SUFFIX = ".renamed"

    def __init__(self, vim_client, ds_manager):
        super(EsxImageManager, self).__init__()
        self._logger = logging.getLogger(__name__)
        self._vim_client = vim_client
        self._ds_manager = ds_manager
        self._image_reaper = None

    def monitor_for_cleanup(self,
                            reap_interval=DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL):
        self._image_reaper = Periodic(self.reap_tmp_images, reap_interval)
        self._image_reaper.daemon = True
        self._image_reaper.start()

    def cleanup(self):
        if self._image_reaper is not None:
            self._image_reaper.stop()

    @log_duration
    def check_image(self, image_id, datastore):
        image_dir = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)
        try:
            return os.path.exists(image_dir)
        except:
            self._logger.exception(
                "Error looking up %s" % image_dir)
            return False

    """
    The following method is intended
    as a replacement of check_image in
    the vm creation workflow compatible
    with the new image sweeper.
    For an image to be valid both the
    directory and the image timestamp
    file must exists on the datastore.
    """
    def check_and_validate_image(self, image_id, ds_id):
        image_dir = os.path.dirname(
            os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME_PREFIX))

        try:
            if not os.path.exists(image_dir):
                return False
        except:
            self._logger.exception(
                "Error looking up %s" % image_dir)
            return False

        # Check the existence of the timestamp file
        timestamp_pathname = \
            os.path.join(image_dir,
                         self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                return True
        except Exception as ex:
            self._logger.exception(
                "Exception looking up %s, %s" % (timestamp_pathname, ex))
            return False

        return False

    """
    This method is used to update the mod time on the
    image timestamp file.
    """
    def touch_image_timestamp(self, ds_id, image_id):
        """
        :param ds_id:
        :param image_id:
        :return:
        """
        image_path = os.path.dirname(
            os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME_PREFIX))

        # Touch the timestamp file
        timestamp_pathname = os.path.join(image_path, self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            os.utime(timestamp_pathname, None)
        except Exception as ex:
            self._logger.exception(
                "Exception looking up %s, %s" % (timestamp_pathname, ex))
            raise ex

    @log_duration
    def check_image_dir(self, image_id, datastore):
        image_path = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)
        try:
            return os.path.exists(os.path.dirname(image_path))
        except:
            self._logger.error(
                "Error looking up %s" % image_path, exc_info=True)
            return False

    def get_image_directory_path(self, datastore_id, image_id):
        return image_directory_path(datastore_id, image_id)

    def get_image_path(self, datastore_id, image_id):
        return os_vmdk_path(datastore_id, image_id, IMAGE_FOLDER_NAME_PREFIX)

    def image_size(self, image_id):
        for image_ds in self._ds_manager.image_datastores():
            try:
                image_path = os_vmdk_flat_path(image_ds, image_id,
                                               IMAGE_FOLDER_NAME_PREFIX)
                return os.path.getsize(image_path)
            except os.error:
                self._logger.info("Image %s not found in DataStore %s" %
                                  (image_id, image_ds))

        self._logger.warning("Failed to get image size:",
                             exc_info=True)
        # Failed to access shared image.
        raise NoSuchResourceException(
            ResourceType.IMAGE,
            "Image does not exist.")

    def _load_json(self, metadata_path):
        if os.path.exists(metadata_path):
            with open(metadata_path) as fh:
                try:
                    data = json.load(fh)
                    return data
                except ValueError:
                    self._logger.error(
                        "Error loading metadata file %s" % metadata_path,
                        exc_info=True)
        return {}

    def get_image_metadata(self, image_id, datastore):
        metadata_path = os_metadata_path(datastore,
                                         image_id,
                                         IMAGE_FOLDER_NAME_PREFIX)
        self._logger.info("Loading metadata %s" % metadata_path)
        return self._load_json(metadata_path)

    def _get_datastore_type(self, datastore_id):
        datastores = self._ds_manager.get_datastores()
        return [ds.type for ds in datastores if ds.id == datastore_id][0]

    def _prepare_virtual_disk_spec(self, disk_type, adapter_type):
        """
        :param disk_type [vim.VirtualDiskManager.VirtualDiskType]:
        :param adapter_type [vim.VirtualDiskManager.VirtualDiskAdapterType]:
        """
        _vd_spec = vim.VirtualDiskManager.VirtualDiskSpec()
        _vd_spec.diskType = str(disk_type)
        _vd_spec.adapterType = str(adapter_type)

        return _vd_spec

    def _copy_to_tmp_image(self, source_datastore, source_id, dest_datastore, dest_id):
        """ Copy an image into a temp location.
            1. Lock a tmp image destination file with an exclusive lock. This
            is to prevent the GC thread from garbage collecting directories
            that are actively being used.
            The temp directory name contains a random UUID to prevent
            collisions with concurrent copies
            2. Create the temp directory.
            3. Copy the metadata file over.
            4. Copy the vmdk over.

            @return the tmp image directory on success.
        """
        ds_type = self._get_datastore_type(dest_datastore)
        if ds_type == DatastoreType.VSAN:
            tmp_image_dir = os_datastore_path(dest_datastore,
                                              compond_path_join(IMAGE_FOLDER_NAME_PREFIX, dest_id),
                                              compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))
        else:
            tmp_image_dir = os_datastore_path(dest_datastore,
                                              compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))

        # Create the temp directory
        self._vim_client.make_directory(tmp_image_dir)

        # Copy the metadata file if it exists.
        source_meta = os_metadata_path(source_datastore, source_id, IMAGE_FOLDER_NAME_PREFIX)
        if os.path.exists(source_meta):
            try:
                dest_meta = os.path.join(tmp_image_dir, metadata_filename(dest_id))
                shutil.copy(source_meta, dest_meta)
            except:
                self._logger.exception("Failed to copy metadata file %s", source_meta)
                raise

        # Create the timestamp file
        self._create_image_timestamp_file(tmp_image_dir)

        _vd_spec = self._prepare_virtual_disk_spec(
            vim.VirtualDiskManager.VirtualDiskType.thin,
            vim.VirtualDiskManager.VirtualDiskAdapterType.lsiLogic)

        self._manage_disk(vim.VirtualDiskManager.CopyVirtualDisk_Task,
                          sourceName=vmdk_path(source_datastore, source_id, IMAGE_FOLDER_NAME_PREFIX),
                          destName=os_to_datastore_path(os.path.join(tmp_image_dir, "%s.vmdk" % dest_id)),
                          destSpec=_vd_spec)
        return tmp_image_dir

    def _move_image(self, image_id, datastore, tmp_dir):
        """
        Atomic move of a tmp folder into the image datastore. Handles
        concurrent moves by locking a well know derivative of the image_id
        while doing the atomic move.
        The exclusive file lock ensures that only one move is successful.
        Has the following side effects:
            a - If the destination image already exists, it is assumed that
            someone else successfully copied the image over and the temp
            directory is deleted.
            b - If we fail to acquire the file lock after retrying 3 times,
            or the atomic move fails, the tmp image directory will be left
            behind and needs to be garbage collected later.

        image_id: String.The image id of the image being moved.
        datastore: String. The datastore id of the datastore.
        tmp_dir: String. The absolute path of the temp image directory.

        raises: OsError if the move fails
                AcquireLockFailure, InvalidFile if we fail to lock the
                destination image.
        """
        ds_type = self._get_datastore_type(datastore)
        image_path = os_datastore_path(datastore, compond_path_join(IMAGE_FOLDER_NAME_PREFIX, image_id))
        self._logger.info("_move_image: %s => %s, ds_type: %s" % (tmp_dir, image_path, ds_type))

        if not os.path.exists(tmp_dir):
            raise ImageNotFoundException("Temp image %s not found" % tmp_dir)

        try:
            with FileBackedLock(image_path, ds_type, retry=300, wait_secs=0.01):  # wait lock for 3 seconds
                if self._check_image_repair(image_id, datastore):
                    raise DiskAlreadyExistException("Image already exists")

                if ds_type == DatastoreType.VSAN:
                    # on VSAN, move all files under [datastore]/image_[image_id]/tmp_image_[uuid]/* to
                    # [datastore]/image_[image_id]/*.
                    # Also we do not delete tmp_image folder in success case, because VSAN accesses it
                    # when creating linked VM, even the folder is now empty.
                    for entry in os.listdir(tmp_dir):
                        shutil.move(os.path.join(tmp_dir, entry), os.path.join(image_path, entry))
                else:
                    # on VMFS/NFS/etc, rename [datastore]/tmp_image_[uuid] to [datastore]/tmp_image_[image_id]
                    self._vim_client.move_file(tmp_dir, image_path)
        except:
            self._logger.exception("Move image %s to %s failed" % (image_id, image_path))
            self._vim_client.delete_file(tmp_dir)
            raise

    """
    The following method should be used to check
    and validate the existence of a previously
    created image. With the new image delete path
    the "timestamp" file must exists inside the
    image directory. If the directory exists and
    the file does not, it may mean that an image
    delete operation was aborted mid-way. In this
    case the following method recreate the timestamp
    file. All operations are performed while
    holding the image directory lock (FileBackedLock),
    the caller is required to hold the lock.
    """
    def _check_image_repair(self, image_id, datastore):
        vmdk_pathname = os_vmdk_path(datastore,
                                     image_id,
                                     IMAGE_FOLDER_NAME_PREFIX)

        image_dirname = os.path.dirname(vmdk_pathname)
        try:
            # Check vmdk file
            if not os.path.exists(vmdk_pathname):
                self._logger.info("Vmdk path doesn't exists: %s" %
                                  vmdk_pathname)
                return False
        except Exception as ex:
            self._logger.exception(
                "Exception validating %s, %s" % (image_dirname, ex))
            return False

        # Check timestamp file
        timestamp_pathname = \
            os.path.join(image_dirname,
                         self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                self._logger.info("Timestamp file exists: %s" %
                                  timestamp_pathname)
                return True
        except Exception as ex:
            self._logger.exception(
                "Exception validating %s, %s" % (timestamp_pathname, ex))

        # The timestamp file is not accessible,
        # try creating one, if successful try to
        # delete the renamed timestamp file if it
        # exists
        try:
            self._create_image_timestamp_file(image_dirname)
            self._delete_renamed_image_timestamp_file(image_dirname)
        except Exception as ex:
            self._logger.exception(
                "Exception creating %s, %s" % (timestamp_pathname, ex))
            return False

        self._logger.info("Image repaired: %s" %
                          image_dirname)
        return True

    def copy_image(self, source_datastore, source_id, dest_datastore, dest_id):
        """Copy an image between datastores.

        This method is used to create a "full clone" of a vmdk.
        It does so by copying a disk to a unique directory in a well known
        temporary directory then moving the disk to the destination image
        location. Data in the temporary directory not properly cleaned up
        will be periodically garbage collected by the reaper thread.

        This minimizes the window during which the vmdk path exists with
        incomplete content. It also works around a hostd issue where
        cp -f does not work.

        The current behavior for when the destination disk exists is
        to overwrite said disk.

        source_datastore: id of the source datastore
        source_id: id of the image to copy from
        dest_datastore: id of the destination datastore
        dest_id: id of the new image in the destination datastore

        throws: AcquireLockFailure if timed out waiting to acquire lock on tmp
                image directory
        throws: InvalidFile if unable to lock tmp image directory or some other
                reasons
        """
        if self.check_and_validate_image(dest_id, dest_datastore):
            # The image is copied, presumably via some other concurrent
            # copy, so we move on.
            self._logger.info("Image %s already copied" % dest_id)
            raise DiskAlreadyExistException("Image already exists")

        # Copy image to the tmp directory.
        tmp_dir = self._copy_to_tmp_image(source_datastore, source_id,
                                          dest_datastore, dest_id)

        self._move_image(dest_id, dest_datastore, tmp_dir)

    def reap_tmp_images(self):
        """ Clean up unused directories in the temp image folder. """
        for ds in self._ds_manager.get_datastores():
            tmp_image_pattern = os_datastore_path_pattern(ds.id, TMP_IMAGE_FOLDER_NAME_PREFIX)
            for image_dir in glob.glob(tmp_image_pattern):
                if not os.path.isdir(image_dir):
                    continue

                create_time = os.stat(image_dir).st_ctime
                current_time = time.time()
                if current_time - self.REAP_TMP_IMAGES_GRACE_PERIOD < create_time:
                    # Skip folders that are newly created in past x minutes
                    # For example, during host-to-host transfer, hostd on
                    # receiving end stores the uploaded file in temp images
                    # folder but does not lock it with FileBackedLock, so we
                    # need to allow a grace period before reaping it.
                    self._logger.info(
                        "Skip folder: %s, created: %s, now: %s" %
                        (image_dir, create_time, current_time))
                    continue

                try:
                    with FileBackedLock(image_dir, ds.type):
                        if os.path.exists(image_dir):
                            self._logger.info("Delete folder %s" % image_dir)
                            shutil.rmtree(image_dir, ignore_errors=True)
                except (AcquireLockFailure, InvalidFile):
                    self._logger.info("Already locked: %s, skipping" % image_dir)
                except:
                    self._logger.info("Unable to remove %s" % image_dir, exc_info=True)

    def get_images(self, datastore):
        """ Get image list from datastore
        :param datastore: datastore id
        :return: list of string, image id list
        """
        image_ids = []

        if not os.path.exists(os_datastore_root(datastore)):
            raise DatastoreNotFoundException()

        # image_folder is /vmfs/volumes/${datastore}/images_*
        image_folder_pattern = os_datastore_path_pattern(datastore,
                                                         IMAGE_FOLDER_NAME_PREFIX)
        for dir in glob.glob(image_folder_pattern):
            image_id = dir.split(COMPOND_PATH_SEPARATOR)[1]
            if self.check_image(image_id, datastore):
                image_ids.append(image_id)

        return image_ids

    def _unzip(self, src, dst):
        self._logger.info("unzip %s -> %s" % (src, dst))

        fsrc = gzip.open(src, "rb")
        fdst = open(dst, "wb")

        try:
            shutil.copyfileobj(fsrc, fdst)
        finally:
            fsrc.close()
            fdst.close()

    def _copy_disk(self, src, dst):
        self._manage_disk(vim.VirtualDiskManager.CopyVirtualDisk_Task,
                          sourceName=src, destName=dst)

    def _manage_disk(self, op, **kwargs):
        try:
            self._logger.debug("Invoking %s(%s)" % (op.info.name, kwargs))
            task = op(self._manager, **kwargs)
            self._vim_client.wait_for_task(task)
        except vim.Fault.FileAlreadyExists, e:
            raise DiskAlreadyExistException(e.msg)
        except vim.Fault.FileFault, e:
            raise DiskFileException(e.msg)
class EsxImageManager(ImageManager):
    NUM_MAKEDIRS_ATTEMPTS = 10
    DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL = 600.0
    IMAGE_TOMBSTONE_FILE_NAME = "image_tombstone.txt"
    IMAGE_MARKER_FILE_NAME = "unused_image_marker.txt"
    IMAGE_TIMESTAMP_FILE_NAME = "image_timestamp.txt"
    IMAGE_TIMESTAMP_FILE_RENAME_SUFFIX = ".renamed"

    def __init__(self, vim_client, ds_manager):
        super(EsxImageManager, self).__init__()
        self._logger = logging.getLogger(__name__)
        self._vim_client = vim_client
        self._ds_manager = ds_manager
        self._image_reaper = None
        self._uwsim_nas_exist = None
        agent_config = services.get(ServiceName.AGENT_CONFIG)
        self._in_uwsim = agent_config.in_uwsim

    def monitor_for_cleanup(self,
                            reap_interval=DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL):
        self._image_reaper = Periodic(self.reap_tmp_images, reap_interval)
        self._image_reaper.daemon = True
        self._image_reaper.start()

    def cleanup(self):
        if self._image_reaper is not None:
            self._image_reaper.stop()

    @log_duration
    def check_image(self, image_id, datastore):
        image_dir = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME)

        try:
            return os.path.exists(image_dir)
        except:
            self._logger.exception(
                "Error looking up %s" % image_dir)
            return False

    """
    The following method is intended
    as a replacement of check_image in
    the vm creation workflow compatible
    with the new image sweeper.
    For an image to be valid both the
    directory and the image timestamp
    file must exists on the datastore.
    """
    def check_and_validate_image(self, image_id, ds_id):
        image_dir = os.path.dirname(
            os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME))

        try:
            if not os.path.exists(image_dir):
                return False
        except:
            self._logger.exception(
                "Error looking up %s" % image_dir)
            return False

        # Check the existence of the timestamp file
        timestamp_pathname = \
            os.path.join(image_dir,
                         self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                return True
        except Exception as ex:
            self._logger.exception(
                "Exception looking up %s, %s" % (timestamp_pathname, ex))
            return False

        return False

    """
    This method is used to update the mod time on the
    image timestamp file. It also checks for the existence
    of a tombstone file for this image. If the tombstone
    file exists it throws an exception.
    """
    def touch_image_timestamp(self, ds_id, image_id):
        """
        :param ds_id:
        :param image_id:
        :return:
        """
        image_path = os.path.dirname(
            os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME))

        # Check the existence of the timestamp file
        tombstone_pathname = \
            os.path.join(image_path,
                         self.IMAGE_TOMBSTONE_FILE_NAME)
        try:
            tombstone = os.path.exists(tombstone_pathname)
        except Exception as ex:
            self._logger.exception(
                "Exception looking up %s, %s" % (tombstone_pathname, ex))

        if tombstone:
            raise InvalidImageState

        # Touch the timestamp file
        timestamp_pathname = \
            os.path.join(image_path,
                         self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            os.utime(timestamp_pathname, None)
        except Exception as ex:
            self._logger.exception(
                "Exception looking up %s, %s" % (timestamp_pathname, ex))
            raise ex

    """
    This method is used to create a tombstone marker
    in the new image management work flow. The tombstone
    marker is a file under the image directory.
    """
    def create_image_tombstone(self, ds_id, image_id):
        """
        :param ds_id:
        :param image_id:
        :return:
        """
        image_path = os.path.dirname(
            os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME))

        # Create tombstone file for the image
        tombstone_pathname = \
            os.path.join(image_path,
                         self.IMAGE_TOMBSTONE_FILE_NAME)
        try:
            open(tombstone_pathname, 'w').close()
        except Exception as ex:
            self._logger.exception(
                "Exception creating %s, %s" % (tombstone_pathname, ex))
            raise ex

        self._logger.info("Image: %s tombstoned" % tombstone_pathname)

    @log_duration
    def check_image_dir(self, image_id, datastore):
        image_path = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME)
        try:
            return os.path.exists(os.path.dirname(image_path))
        except:
            self._logger.error(
                "Error looking up %s" % image_path, exc_info=True)
            return False

    def get_image_directory_path(self, datastore_id, image_id):
        return image_directory_path(datastore_id, image_id)

    def get_image_path(self, datastore_id, image_id):
        return os_vmdk_path(datastore_id, image_id, IMAGE_FOLDER_NAME)

    def image_size(self, image_id):
        # TODO(mmutsuzaki) We should iterate over all the image datastores
        # until we find one that has the image.
        image_ds = list(self._ds_manager.image_datastores())[0]
        image_path = os_vmdk_flat_path(image_ds, image_id, IMAGE_FOLDER_NAME)

        return os.path.getsize(image_path)

    def _load_json(self, metadata_path):
        if os.path.exists(metadata_path):
            with open(metadata_path) as fh:
                try:
                    data = json.load(fh)
                    return data
                except ValueError:
                    self._logger.error(
                        "Error loading metadata file %s" % metadata_path,
                        exc_info=True)
        return {}

    def get_image_metadata(self, image_id, datastore):
        metadata_path = os_metadata_path(datastore,
                                         image_id,
                                         IMAGE_FOLDER_NAME)
        self._logger.info("Loading metadata %s" % metadata_path)
        return self._load_json(metadata_path)

    def get_image_manifest(self, image_id):
        # This is a shortcut for ttylinux. ttylinux doesn't have manifest file.
        if image_id == "ttylinux":
            return ImageType.CLOUD, ImageReplication.EAGER

        # TODO(mmutsuzaki) We should iterate over all the image datastores
        # until we find one that has the image.
        image_ds = list(self._ds_manager.image_datastores())[0]
        manifest_path = os_image_manifest_path(image_ds, image_id)
        if not os.path.isfile(manifest_path):
            self._logger.info("Manifest file %s not found" % manifest_path)
            return None, None

        self._logger.info("Loading manifest %s" % manifest_path)
        data = self._load_json(manifest_path)
        type = ImageType._NAMES_TO_VALUES[data["imageType"]]
        replication = ImageReplication._NAMES_TO_VALUES[
            data["imageReplication"]]

        return type, replication

    def _get_datastore_type(self, datastore_id):
        datastores = self._ds_manager.get_datastores()
        return [ds.type for ds in datastores if ds.id == datastore_id][0]

    def _prepare_virtual_disk_spec(self, disk_type, adapter_type):
        """
        :param disk_type [vim.VirtualDiskManager.VirtualDiskType]:
        :param adapter_type [vim.VirtualDiskManager.VirtualDiskAdapterType]:
        """
        _vd_spec = vim.VirtualDiskManager.VirtualDiskSpec()
        _vd_spec.diskType = str(disk_type)
        _vd_spec.adapterType = str(adapter_type)

        return _vd_spec

    def _create_tmp_image(self, source_datastore, source_id, dest_datastore,
                          dest_id):
        """ Copy an image into a temp location.
            1. Lock a tmp image destination file with an exclusive lock. This
            is to prevent the GC thread from garbage collecting directories
            that are actively being used.
            The temp directory name contains a random UUID to prevent
            collisions with concurrent copies
            2. Create the temp directory.
            3. Copy the metadata file over.
            4. Copy the vmdk over.

            @return the tmp image directory on success.
        """
        source = vmdk_path(source_datastore, source_id, IMAGE_FOLDER_NAME)
        temp_dest = tmp_image_path(dest_datastore, dest_id)
        ds_type = self._get_datastore_type(dest_datastore)
        tmp_image_dir_path = os.path.dirname(datastore_to_os_path(temp_dest))
        # Try grabbing the lock on the temp directory if it fails
        # (very unlikely) someone else is copying an image just retry
        # later.
        with FileBackedLock(tmp_image_dir_path, ds_type):
            source_meta = os_metadata_path(source_datastore, source_id,
                                           IMAGE_FOLDER_NAME)
            # Create the temp directory
            mkdir_p(tmp_image_dir_path)

            # Copy the metadata file if it exists.
            if os.path.exists(source_meta):
                try:
                    shutil.copy(source_meta, tmp_image_dir_path)
                except:
                    self._logger.exception("Failed to copy metadata file %s",
                                           source_meta)
                    raise

            # Create the timestamp file
            self._create_image_timestamp_file(tmp_image_dir_path)

            _vd_spec = self._prepare_virtual_disk_spec(
                vim.VirtualDiskManager.VirtualDiskType.thin,
                vim.VirtualDiskManager.VirtualDiskAdapterType.lsiLogic)

            self._manage_disk(vim.VirtualDiskManager.CopyVirtualDisk_Task,
                              sourceName=source, destName=temp_dest,
                              destSpec=_vd_spec)
        return tmp_image_dir_path

    def _move_image(self, image_id, datastore, tmp_dir):
        """
        Atomic move of a tmp folder into the image datastore. Handles
        concurrent moves by locking a well know derivative of the image_id
        while doing the atomic move.
        The exclusive file lock ensures that only one move is successful.
        Has the following side effects:
            a - If the destination image already exists, it is assumed that
            someone else successfully copied the image over and the temp
            directory is deleted.
            b - If we fail to acquire the file lock after retrying 3 times,
            or the atomic move fails, the tmp image directory will be left
            behind and needs to be garbage collected later.

        image_id: String.The image id of the image being moved.
        datastore: String. The datastore id of the datastore.
        tmp_dir: String. The absolute path of the temp image directory.

        raises: OsError if the move fails
                AcquireLockFailure, InvalidFile if we fail to lock the
                destination image.
        """
        ds_type = self._get_datastore_type(datastore)
        image_path = os.path.dirname(os_vmdk_path(datastore, image_id,
                                                  IMAGE_FOLDER_NAME))
        parent_path = os.path.dirname(image_path)
        # Create the parent image directory if it doesn't exist.
        try:
            mkdir_p(parent_path)
        except OSError as e:
            if e.errno == errno.EEXIST and os.path.isdir(parent_path):
                # Parent directory exists nothing to do.
                pass
            else:
                raise
        try:
            with FileBackedLock(image_path, ds_type, retry=300,
                                wait_secs=0.01):  # wait lock for 3 seconds
                if self._check_image_repair(image_id, datastore):
                    raise DiskAlreadyExistException("Image already exists")

                shutil.move(tmp_dir, image_path)
        except (AcquireLockFailure, InvalidFile):
            self._logger.info("Unable to lock %s for atomic move" % image_id)
            raise
        except DiskAlreadyExistException:
            self._logger.info("Image %s already copied" % image_id)
            rm_rf(tmp_dir)
            raise

    """
    The following method should be used to check
    and validate the existence of a previously
    created image. With the new image delete path
    the "timestamp" file must exists inside the
    image directory. If the directory exists and
    the file does not, it may mean that an image
    delete operation was aborted mid-way. In this
    case the following method recreate the timestamp
    file. All operations are performed while
    holding the image directory lock (FileBackedLock),
    the caller is required to hold the lock.
    """
    def _check_image_repair(self, image_id, datastore):
        vmdk_pathname = os_vmdk_path(datastore,
                                     image_id,
                                     IMAGE_FOLDER_NAME)

        image_dirname = os.path.dirname(vmdk_pathname)
        try:
            # Check vmdk file
            if not os.path.exists(vmdk_pathname):
                self._logger.info("Vmdk path doesn't exists: %s" %
                                  vmdk_pathname)
                return False
        except Exception as ex:
            self._logger.exception(
                "Exception validating %s, %s" % (image_dirname, ex))
            return False

        # Check timestamp file
        timestamp_pathname = \
            os.path.join(image_dirname,
                         self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                self._logger.info("Timestamp file exists: %s" %
                                  timestamp_pathname)
                return True
        except Exception as ex:
            self._logger.exception(
                "Exception validating %s, %s" % (timestamp_pathname, ex))

        # The timestamp file is not accessible,
        # try creating one, if successful try to
        # delete the renamed timestamp file if it
        # exists
        try:
            self._create_image_timestamp_file(image_dirname)
            self._delete_renamed_image_timestamp_file(image_dirname)
        except Exception as ex:
            self._logger.exception(
                "Exception creating %s, %s" % (timestamp_pathname, ex))
            return False

        self._logger.info("Image repaired: %s" %
                          image_dirname)
        return True

    def copy_image(self, source_datastore, source_id, dest_datastore, dest_id):
        """Copy an image between datastores.

        This method is used to create a "full clone" of a vmdk.
        It does so by copying a disk to a unique directory in a well known
        temporary directory then moving the disk to the destination image
        location. Data in the temporary directory not properly cleaned up
        will be periodically garbage collected by the reaper thread.

        This minimizes the window during which the vmdk path exists with
        incomplete content. It also works around a hostd issue where
        cp -f does not work.

        The current behavior for when the destination disk exists is
        to overwrite said disk.

        source_datastore: id of the source datastore
        source_id: id of the image to copy from
        dest_datastore: id of the destination datastore
        dest_id: id of the new image in the destination datastore

        throws: AcquireLockFailure if timed out waiting to acquire lock on tmp
                image directory
        throws: InvalidFile if unable to lock tmp image directory or some other
                reasons
        """
        if self.check_and_validate_image(dest_id, dest_datastore):
            # The image is copied, presumably via some other concurrent
            # copy, so we move on.
            self._logger.info("Image %s already copied" % dest_id)
            raise DiskAlreadyExistException("Image already exists")

        # Copy image to the tmp directory.
        tmp_dir = self._create_tmp_image(source_datastore, source_id,
                                         dest_datastore, dest_id)

        self._move_image(dest_id, dest_datastore, tmp_dir)

    def reap_tmp_images(self):
        """ Clean up unused directories in the temp image folder. """
        for ds in self._ds_manager.get_datastores():
            images_dir = tmp_image_folder_os_path(ds.id)

            for f in os.listdir(images_dir):
                path = os.path.join(images_dir, f)
                if not os.path.isdir(path):
                    continue
                try:
                    with FileBackedLock(path, ds.type):
                        if (os.path.exists(path)):
                            self._logger.info("Delete folder %s" % path)
                            shutil.rmtree(path, ignore_errors=True)
                except (AcquireLockFailure, InvalidFile):
                    self._logger.info("Already locked: %s, skipping" % path)
                except:
                    self._logger.info("Unable to remove %s" % path,
                                      exc_info=True)

    def delete_image(self, datastore_id, image_id, ds_type, force):
        # Check if the image currently exists
        if not self.check_image_dir(image_id, datastore_id):
            self._logger.info("Image %s on datastore %s not found" % (image_id,
                              datastore_id))
            raise ImageNotFoundException("Image %s not found" % image_id)

        # Mark image as tombstoned
        self.create_image_tombstone(datastore_id, image_id)

        if not force:
            return

        # If force try to actively garbage collect the image here
        if self._lock_data_disk(datastore_id, image_id):
            self._gc_image_dir(datastore_id, image_id)
        else:
            raise ImageInUse("Image %s is currently in use" % image_id)

        # Now attempt GCing the image directory.
        try:
            self._clean_gc_dir(datastore_id)
        except Exception:
            # Swallow the exception the next clean call will clear it all.
            self._logger.exception("Failed to delete gc dir on datastore %s" %
                                   datastore_id)

    def _lock_data_disk(self, datastore_id, image_id):
        """
        Lock the data disks associated with the VMs in the provided ref file.
        Return True if locking was successful false otherwise.
        """
        data_disk = os_vmdk_flat_path(datastore_id, image_id)
        try:
            # Its ok to delete the data disk as a subsequent power on will
            # fail if the data disk is not there.
            os.remove(data_disk)
        except OSError:
            # Remove failed so disk is locked.
            self._logger.debug("Disk %s on datastore %s is already locked"
                               % (data_disk, datastore_id))
            return False
        return True

    def get_images(self, datastore):
        """ Get image list from datastore
        :param datastore: datastore id
        :return: list of string, image id list
        """
        image_ids = []

        # image_folder is /vmfs/volumes/${datastore}/images
        image_folder = os_datastore_path(datastore, IMAGE_FOLDER_NAME)

        if not os.path.exists(image_folder):
            raise DatastoreNotFoundException()

        # prefix is the 2-digit prefix of image id
        for prefix in os.listdir(image_folder):
            # outer path is something like
            # /vmfs/volumes/${datastore}/images/${image_id}[0:2]
            outer_path = os.path.join(image_folder, prefix)
            if not os.path.isdir(outer_path):
                continue

            for image_id in os.listdir(outer_path):
                if self.check_image(image_id, datastore):
                    image_ids.append(image_id)

        return image_ids

    def mark_unused(self, image_scanner):
        images_dir_path = os_datastore_path(image_scanner.datastore_id,
                                            IMAGE_FOLDER_NAME)
        # Log messages with prefix: "IMAGE SCANNER" are for debugging
        # and will be removed after basic testing
        self._logger.info("IMAGE SCANNER: images_dir: %s" % images_dir_path)
        if not os.path.isdir(images_dir_path):
            self._logger.info("images_dir_path: images_dir: %s, doesn't exist"
                              % images_dir_path)
            raise DatastoreNotFoundException(
                "Image scanner, cannot find image "
                "directory for datastore: %s"
                % image_scanner.datastore_id)

        return self._mark_unused_images(image_scanner, images_dir_path)

    def delete_unused(self, image_sweeper):
        images_dir_path = os_datastore_path(image_sweeper.datastore_id,
                                            IMAGE_FOLDER_NAME)
        # Log messages with prefix: "IMAGE SWEEPER" are for debugging
        # and will be removed after basic testing
        self._logger.info("IMAGE SWEEPER: images_dir: %s" % images_dir_path)
        if not os.path.isdir(images_dir_path):
            self._logger.info("images_dir_path: images_dir: %s, doesn't exist"
                              % images_dir_path)
            raise DatastoreNotFoundException(
                "Image sweeper, cannot find image "
                "directory for datastore: %s"
                % image_sweeper.datastore_id)

        return self._delete_unused_images(image_sweeper, images_dir_path)

    def _unzip(self, src, dst):
        self._logger.info("unzip %s -> %s" % (src, dst))

        fsrc = gzip.open(src, "rb")
        fdst = open(dst, "wb")

        try:
            shutil.copyfileobj(fsrc, fdst)
        finally:
            fsrc.close()
            fdst.close()

    def _copy_disk(self, src, dst):
        self._manage_disk(vim.VirtualDiskManager.CopyVirtualDisk_Task,
                          sourceName=src, destName=dst)

    def _manage_disk(self, op, **kwargs):
        if self._in_uwsim:
            self._manage_disk_uwsim(op, **kwargs)
            return

        try:
            self._logger.debug("Invoking %s(%s)" % (op.info.name, kwargs))
            task = op(self._manager, **kwargs)
            self._vim_client.wait_for_task(task)
        except vim.Fault.FileAlreadyExists, e:
            raise DiskAlreadyExistException(e.msg)
        except vim.Fault.FileFault, e:
            raise DiskFileException(e.msg)
Ejemplo n.º 12
0
class EsxImageManager(ImageManager):
    NUM_MAKEDIRS_ATTEMPTS = 10
    DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL = 600.0
    IMAGE_TOMBSTONE_FILE_NAME = "image_tombstone.txt"
    IMAGE_MARKER_FILE_NAME = "unused_image_marker.txt"
    IMAGE_TIMESTAMP_FILE_NAME = "image_timestamp.txt"
    IMAGE_TIMESTAMP_FILE_RENAME_SUFFIX = ".renamed"

    def __init__(self, vim_client, ds_manager):
        super(EsxImageManager, self).__init__()
        self._logger = logging.getLogger(__name__)
        self._vim_client = vim_client
        self._ds_manager = ds_manager
        self._image_reaper = None
        self._uwsim_nas_exist = None
        agent_config = services.get(ServiceName.AGENT_CONFIG)
        self._in_uwsim = agent_config.in_uwsim

    def monitor_for_cleanup(self,
                            reap_interval=DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL):
        self._image_reaper = Periodic(self.reap_tmp_images, reap_interval)
        self._image_reaper.daemon = True
        self._image_reaper.start()

    def cleanup(self):
        if self._image_reaper is not None:
            self._image_reaper.stop()

    @log_duration
    def check_image(self, image_id, datastore):
        image_dir = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME)

        try:
            return os.path.exists(image_dir)
        except:
            self._logger.exception("Error looking up %s" % image_dir)
            return False

    """
    The following method is intended
    as a replacement of check_image in
    the vm creation workflow compatible
    with the new image sweeper.
    For an image to be valid both the
    directory and the image timestamp
    file must exists on the datastore.
    """

    def check_and_validate_image(self, image_id, ds_id):
        image_dir = os.path.dirname(
            os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME))

        try:
            if not os.path.exists(image_dir):
                return False
        except:
            self._logger.exception("Error looking up %s" % image_dir)
            return False

        # Check the existence of the timestamp file
        timestamp_pathname = \
            os.path.join(image_dir,
                         self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                return True
        except Exception as ex:
            self._logger.exception("Exception looking up %s, %s" %
                                   (timestamp_pathname, ex))
            return False

        return False

    """
    This method is used to update the mod time on the
    image timestamp file. It also checks for the existence
    of a tombstone file for this image. If the tombstone
    file exists it throws an exception.
    """

    def touch_image_timestamp(self, ds_id, image_id):
        """
        :param ds_id:
        :param image_id:
        :return:
        """
        image_path = os.path.dirname(
            os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME))

        # Check the existence of the timestamp file
        tombstone_pathname = \
            os.path.join(image_path,
                         self.IMAGE_TOMBSTONE_FILE_NAME)
        try:
            tombstone = os.path.exists(tombstone_pathname)
        except Exception as ex:
            self._logger.exception("Exception looking up %s, %s" %
                                   (tombstone_pathname, ex))

        if tombstone:
            raise InvalidImageState

        # Touch the timestamp file
        timestamp_pathname = \
            os.path.join(image_path,
                         self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            os.utime(timestamp_pathname, None)
        except Exception as ex:
            self._logger.exception("Exception looking up %s, %s" %
                                   (timestamp_pathname, ex))
            raise ex

    """
    This method is used to create a tombstone marker
    in the new image management work flow. The tombstone
    marker is a file under the image directory.
    """

    def create_image_tombstone(self, ds_id, image_id):
        """
        :param ds_id:
        :param image_id:
        :return:
        """
        image_path = os.path.dirname(
            os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME))

        # Create tombstone file for the image
        tombstone_pathname = \
            os.path.join(image_path,
                         self.IMAGE_TOMBSTONE_FILE_NAME)
        try:
            open(tombstone_pathname, 'w').close()
        except Exception as ex:
            self._logger.exception("Exception creating %s, %s" %
                                   (tombstone_pathname, ex))
            raise ex

        self._logger.info("Image: %s tombstoned" % tombstone_pathname)

    @log_duration
    def check_image_dir(self, image_id, datastore):
        image_path = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME)
        try:
            return os.path.exists(os.path.dirname(image_path))
        except:
            self._logger.error("Error looking up %s" % image_path,
                               exc_info=True)
            return False

    def get_image_directory_path(self, datastore_id, image_id):
        return image_directory_path(datastore_id, image_id)

    def get_image_path(self, datastore_id, image_id):
        return os_vmdk_path(datastore_id, image_id, IMAGE_FOLDER_NAME)

    def image_size(self, image_id):
        # TODO(mmutsuzaki) We should iterate over all the image datastores
        # until we find one that has the image.
        image_ds = list(self._ds_manager.image_datastores())[0]
        image_path = os_vmdk_flat_path(image_ds, image_id, IMAGE_FOLDER_NAME)

        return os.path.getsize(image_path)

    def _load_json(self, metadata_path):
        if os.path.exists(metadata_path):
            with open(metadata_path) as fh:
                try:
                    data = json.load(fh)
                    return data
                except ValueError:
                    self._logger.error("Error loading metadata file %s" %
                                       metadata_path,
                                       exc_info=True)
        return {}

    def get_image_metadata(self, image_id, datastore):
        metadata_path = os_metadata_path(datastore, image_id,
                                         IMAGE_FOLDER_NAME)
        self._logger.info("Loading metadata %s" % metadata_path)
        return self._load_json(metadata_path)

    def get_image_manifest(self, image_id):
        # This is a shortcut for ttylinux. ttylinux doesn't have manifest file.
        if image_id == "ttylinux":
            return ImageType.CLOUD, ImageReplication.EAGER

        # TODO(mmutsuzaki) We should iterate over all the image datastores
        # until we find one that has the image.
        image_ds = list(self._ds_manager.image_datastores())[0]
        manifest_path = os_image_manifest_path(image_ds, image_id)
        if not os.path.isfile(manifest_path):
            self._logger.info("Manifest file %s not found" % manifest_path)
            return None, None

        self._logger.info("Loading manifest %s" % manifest_path)
        data = self._load_json(manifest_path)
        type = ImageType._NAMES_TO_VALUES[data["imageType"]]
        replication = ImageReplication._NAMES_TO_VALUES[
            data["imageReplication"]]

        return type, replication

    def _get_datastore_type(self, datastore_id):
        datastores = self._ds_manager.get_datastores()
        return [ds.type for ds in datastores if ds.id == datastore_id][0]

    def _prepare_virtual_disk_spec(self, disk_type, adapter_type):
        """
        :param disk_type [vim.VirtualDiskManager.VirtualDiskType]:
        :param adapter_type [vim.VirtualDiskManager.VirtualDiskAdapterType]:
        """
        _vd_spec = vim.VirtualDiskManager.VirtualDiskSpec()
        _vd_spec.diskType = str(disk_type)
        _vd_spec.adapterType = str(adapter_type)

        return _vd_spec

    def _create_tmp_image(self, source_datastore, source_id, dest_datastore,
                          dest_id):
        """ Copy an image into a temp location.
            1. Lock a tmp image destination file with an exclusive lock. This
            is to prevent the GC thread from garbage collecting directories
            that are actively being used.
            The temp directory name contains a random UUID to prevent
            collisions with concurrent copies
            2. Create the temp directory.
            3. Copy the metadata file over.
            4. Copy the vmdk over.

            @return the tmp image directory on success.
        """
        source = vmdk_path(source_datastore, source_id, IMAGE_FOLDER_NAME)
        temp_dest = tmp_image_path(dest_datastore, dest_id)
        ds_type = self._get_datastore_type(dest_datastore)
        tmp_image_dir_path = os.path.dirname(datastore_to_os_path(temp_dest))
        # Try grabbing the lock on the temp directory if it fails
        # (very unlikely) someone else is copying an image just retry
        # later.
        with FileBackedLock(tmp_image_dir_path, ds_type):
            source_meta = os_metadata_path(source_datastore, source_id,
                                           IMAGE_FOLDER_NAME)
            # Create the temp directory
            mkdir_p(tmp_image_dir_path)

            # Copy the metadata file if it exists.
            if os.path.exists(source_meta):
                try:
                    shutil.copy(source_meta, tmp_image_dir_path)
                except:
                    self._logger.exception("Failed to copy metadata file %s",
                                           source_meta)
                    raise

            # Create the timestamp file
            self._create_image_timestamp_file(tmp_image_dir_path)

            _vd_spec = self._prepare_virtual_disk_spec(
                vim.VirtualDiskManager.VirtualDiskType.thin,
                vim.VirtualDiskManager.VirtualDiskAdapterType.lsiLogic)

            self._manage_disk(vim.VirtualDiskManager.CopyVirtualDisk_Task,
                              sourceName=source,
                              destName=temp_dest,
                              destSpec=_vd_spec)
        return tmp_image_dir_path

    def _move_image(self, image_id, datastore, tmp_dir):
        """
        Atomic move of a tmp folder into the image datastore. Handles
        concurrent moves by locking a well know derivative of the image_id
        while doing the atomic move.
        The exclusive file lock ensures that only one move is successful.
        Has the following side effects:
            a - If the destination image already exists, it is assumed that
            someone else successfully copied the image over and the temp
            directory is deleted.
            b - If we fail to acquire the file lock after retrying 3 times,
            or the atomic move fails, the tmp image directory will be left
            behind and needs to be garbage collected later.

        image_id: String.The image id of the image being moved.
        datastore: String. The datastore id of the datastore.
        tmp_dir: String. The absolute path of the temp image directory.

        raises: OsError if the move fails
                AcquireLockFailure, InvalidFile if we fail to lock the
                destination image.
        """
        ds_type = self._get_datastore_type(datastore)
        image_path = os.path.dirname(
            os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME))
        parent_path = os.path.dirname(image_path)
        # Create the parent image directory if it doesn't exist.
        try:
            mkdir_p(parent_path)
        except OSError as e:
            if e.errno == errno.EEXIST and os.path.isdir(parent_path):
                # Parent directory exists nothing to do.
                pass
            else:
                raise
        try:
            with FileBackedLock(image_path, ds_type, retry=300,
                                wait_secs=0.01):  # wait lock for 3 seconds
                if self._check_image_repair(image_id, datastore):
                    raise DiskAlreadyExistException("Image already exists")

                shutil.move(tmp_dir, image_path)
        except (AcquireLockFailure, InvalidFile):
            self._logger.info("Unable to lock %s for atomic move" % image_id)
            raise
        except DiskAlreadyExistException:
            self._logger.info("Image %s already copied" % image_id)
            rm_rf(tmp_dir)
            raise

    """
    The following method should be used to check
    and validate the existence of a previously
    created image. With the new image delete path
    the "timestamp" file must exists inside the
    image directory. If the directory exists and
    the file does not, it may mean that an image
    delete operation was aborted mid-way. In this
    case the following method recreate the timestamp
    file. All operations are performed while
    holding the image directory lock (FileBackedLock),
    the caller is required to hold the lock.
    """

    def _check_image_repair(self, image_id, datastore):
        vmdk_pathname = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME)

        image_dirname = os.path.dirname(vmdk_pathname)
        try:
            # Check vmdk file
            if not os.path.exists(vmdk_pathname):
                self._logger.info("Vmdk path doesn't exists: %s" %
                                  vmdk_pathname)
                return False
        except Exception as ex:
            self._logger.exception("Exception validating %s, %s" %
                                   (image_dirname, ex))
            return False

        # Check timestamp file
        timestamp_pathname = \
            os.path.join(image_dirname,
                         self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                self._logger.info("Timestamp file exists: %s" %
                                  timestamp_pathname)
                return True
        except Exception as ex:
            self._logger.exception("Exception validating %s, %s" %
                                   (timestamp_pathname, ex))

        # The timestamp file is not accessible,
        # try creating one, if successful try to
        # delete the renamed timestamp file if it
        # exists
        try:
            self._create_image_timestamp_file(image_dirname)
            self._delete_renamed_image_timestamp_file(image_dirname)
        except Exception as ex:
            self._logger.exception("Exception creating %s, %s" %
                                   (timestamp_pathname, ex))
            return False

        self._logger.info("Image repaired: %s" % image_dirname)
        return True

    def copy_image(self, source_datastore, source_id, dest_datastore, dest_id):
        """Copy an image between datastores.

        This method is used to create a "full clone" of a vmdk.
        It does so by copying a disk to a unique directory in a well known
        temporary directory then moving the disk to the destination image
        location. Data in the temporary directory not properly cleaned up
        will be periodically garbage collected by the reaper thread.

        This minimizes the window during which the vmdk path exists with
        incomplete content. It also works around a hostd issue where
        cp -f does not work.

        The current behavior for when the destination disk exists is
        to overwrite said disk.

        source_datastore: id of the source datastore
        source_id: id of the image to copy from
        dest_datastore: id of the destination datastore
        dest_id: id of the new image in the destination datastore

        throws: AcquireLockFailure if timed out waiting to acquire lock on tmp
                image directory
        throws: InvalidFile if unable to lock tmp image directory or some other
                reasons
        """
        if self.check_and_validate_image(dest_id, dest_datastore):
            # The image is copied, presumably via some other concurrent
            # copy, so we move on.
            self._logger.info("Image %s already copied" % dest_id)
            raise DiskAlreadyExistException("Image already exists")

        # Copy image to the tmp directory.
        tmp_dir = self._create_tmp_image(source_datastore, source_id,
                                         dest_datastore, dest_id)

        self._move_image(dest_id, dest_datastore, tmp_dir)

    def reap_tmp_images(self):
        """ Clean up unused directories in the temp image folder. """
        for ds in self._ds_manager.get_datastores():
            images_dir = tmp_image_folder_os_path(ds.id)

            for f in os.listdir(images_dir):
                path = os.path.join(images_dir, f)
                if not os.path.isdir(path):
                    continue
                try:
                    with FileBackedLock(path, ds.type):
                        if (os.path.exists(path)):
                            self._logger.info("Delete folder %s" % path)
                            shutil.rmtree(path, ignore_errors=True)
                except (AcquireLockFailure, InvalidFile):
                    self._logger.info("Already locked: %s, skipping" % path)
                except:
                    self._logger.info("Unable to remove %s" % path,
                                      exc_info=True)

    def delete_image(self, datastore_id, image_id, ds_type, force):
        # Check if the image currently exists
        if not self.check_image_dir(image_id, datastore_id):
            self._logger.info("Image %s on datastore %s not found" %
                              (image_id, datastore_id))
            raise ImageNotFoundException("Image %s not found" % image_id)

        # Mark image as tombstoned
        self.create_image_tombstone(datastore_id, image_id)

        if not force:
            return

        # If force try to actively garbage collect the image here
        if self._lock_data_disk(datastore_id, image_id):
            self._gc_image_dir(datastore_id, image_id)
        else:
            raise ImageInUse("Image %s is currently in use" % image_id)

        # Now attempt GCing the image directory.
        try:
            self._clean_gc_dir(datastore_id)
        except Exception:
            # Swallow the exception the next clean call will clear it all.
            self._logger.exception("Failed to delete gc dir on datastore %s" %
                                   datastore_id)

    def _lock_data_disk(self, datastore_id, image_id):
        """
        Lock the data disks associated with the VMs in the provided ref file.
        Return True if locking was successful false otherwise.
        """
        data_disk = os_vmdk_flat_path(datastore_id, image_id)
        try:
            # Its ok to delete the data disk as a subsequent power on will
            # fail if the data disk is not there.
            os.remove(data_disk)
        except OSError:
            # Remove failed so disk is locked.
            self._logger.debug("Disk %s on datastore %s is already locked" %
                               (data_disk, datastore_id))
            return False
        return True

    def get_images(self, datastore):
        """ Get image list from datastore
        :param datastore: datastore id
        :return: list of string, image id list
        """
        image_ids = []

        # image_folder is /vmfs/volumes/${datastore}/images
        image_folder = os_datastore_path(datastore, IMAGE_FOLDER_NAME)

        if not os.path.exists(image_folder):
            raise DatastoreNotFoundException()

        # prefix is the 2-digit prefix of image id
        for prefix in os.listdir(image_folder):
            # outer path is something like
            # /vmfs/volumes/${datastore}/images/${image_id}[0:2]
            outer_path = os.path.join(image_folder, prefix)
            if not os.path.isdir(outer_path):
                continue

            for image_id in os.listdir(outer_path):
                if self.check_image(image_id, datastore):
                    image_ids.append(image_id)

        return image_ids

    def mark_unused(self, image_scanner):
        images_dir_path = os_datastore_path(image_scanner.datastore_id,
                                            IMAGE_FOLDER_NAME)
        # Log messages with prefix: "IMAGE SCANNER" are for debugging
        # and will be removed after basic testing
        self._logger.info("IMAGE SCANNER: images_dir: %s" % images_dir_path)
        if not os.path.isdir(images_dir_path):
            self._logger.info(
                "images_dir_path: images_dir: %s, doesn't exist" %
                images_dir_path)
            raise DatastoreNotFoundException(
                "Image scanner, cannot find image "
                "directory for datastore: %s" % image_scanner.datastore_id)

        return self._mark_unused_images(image_scanner, images_dir_path)

    def delete_unused(self, image_sweeper):
        images_dir_path = os_datastore_path(image_sweeper.datastore_id,
                                            IMAGE_FOLDER_NAME)
        # Log messages with prefix: "IMAGE SWEEPER" are for debugging
        # and will be removed after basic testing
        self._logger.info("IMAGE SWEEPER: images_dir: %s" % images_dir_path)
        if not os.path.isdir(images_dir_path):
            self._logger.info(
                "images_dir_path: images_dir: %s, doesn't exist" %
                images_dir_path)
            raise DatastoreNotFoundException(
                "Image sweeper, cannot find image "
                "directory for datastore: %s" % image_sweeper.datastore_id)

        return self._delete_unused_images(image_sweeper, images_dir_path)

    def _unzip(self, src, dst):
        self._logger.info("unzip %s -> %s" % (src, dst))

        fsrc = gzip.open(src, "rb")
        fdst = open(dst, "wb")

        try:
            shutil.copyfileobj(fsrc, fdst)
        finally:
            fsrc.close()
            fdst.close()

    def _copy_disk(self, src, dst):
        self._manage_disk(vim.VirtualDiskManager.CopyVirtualDisk_Task,
                          sourceName=src,
                          destName=dst)

    def _manage_disk(self, op, **kwargs):
        if self._in_uwsim:
            self._manage_disk_uwsim(op, **kwargs)
            return

        try:
            self._logger.debug("Invoking %s(%s)" % (op.info.name, kwargs))
            task = op(self._manager, **kwargs)
            self._vim_client.wait_for_task(task)
        except vim.Fault.FileAlreadyExists, e:
            raise DiskAlreadyExistException(e.msg)
        except vim.Fault.FileFault, e:
            raise DiskFileException(e.msg)
Ejemplo n.º 13
0
class ImageManager():
    NUM_MAKEDIRS_ATTEMPTS = 10
    DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL = 600.0
    REAP_TMP_IMAGES_GRACE_PERIOD = 2 * 60.0 * 60.0  # 2 hrs
    DELETE_IMAGE_GRACE_PERIOD = 60
    UNUSED_IMAGE_MARKER_FILE_NAME = "unused_image_marker.txt"
    IMAGE_TIMESTAMP_FILE_NAME = "image_timestamp.txt"

    def __init__(self, host_client, ds_manager):
        self._logger = logging.getLogger(__name__)
        self._host_client = host_client
        self._ds_manager = ds_manager
        self._image_reaper = None

    def monitor_for_cleanup(self, reap_interval=DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL):
        self._image_reaper = Periodic(self.reap_tmp_images, reap_interval)
        self._image_reaper.daemon = True
        self._image_reaper.start()

    def cleanup(self):
        if self._image_reaper is not None:
            self._image_reaper.stop()

    def datastores_with_image(self, image_id, datastores):
        if image_id is None:
            return []
        return [ds for ds in datastores if self.check_image(image_id, ds)]

    def image_metadata(self, image_id, datastores):
        for ds in datastores:
            if self.check_image(image_id, ds):
                return self.get_image_metadata(image_id, ds)

    @staticmethod
    def get_image_id_from_disks(disks):
        """Find image id in the disk collection"""
        if not disks:
            return None

        for disk in disks:
            try:
                if disk.image.id is not None:
                    return disk.image.id
            except AttributeError:
                continue
        return None

    @log_duration
    def check_image(self, image_id, datastore):
        image_dir = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)
        try:
            return os.path.exists(image_dir)
        except:
            self._logger.exception("Error looking up %s" % image_dir)
            return False

    """
    The following method is intended as a replacement of check_image in
    the vm creation workflow compatible with the new image sweeper.
    For an image to be valid both the directory and the image timestamp
    file must exists on the datastore.
    """
    def check_and_validate_image(self, image_id, ds_id):
        image_dir = os.path.dirname(os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME_PREFIX))

        try:
            if not os.path.exists(image_dir):
                return False
        except:
            self._logger.exception("Error looking up %s" % image_dir)
            return False

        # Check the existence of the timestamp file
        timestamp_pathname = os.path.join(image_dir, self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                return True
        except Exception as ex:
            self._logger.exception("Exception looking up %s, %s" % (timestamp_pathname, ex))
            return False

        return False

    """
    This method is used to update the mod time on the image timestamp file.
    """
    def touch_image_timestamp(self, ds_id, image_id):

        image_path = os.path.dirname(os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME_PREFIX))

        # Touch the timestamp file
        timestamp_pathname = os.path.join(image_path, self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            os.utime(timestamp_pathname, None)
        except Exception as ex:
            self._logger.exception("Exception looking up %s, %s" % (timestamp_pathname, ex))
            raise ex

    @log_duration
    def check_image_dir(self, image_id, datastore):
        image_path = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)
        try:
            return os.path.exists(os.path.dirname(image_path))
        except:
            self._logger.error(
                "Error looking up %s" % image_path, exc_info=True)
            return False

    def get_image_directory_path(self, datastore_id, image_id):
        return image_directory_path(datastore_id, image_id)

    def get_image_path(self, datastore_id, image_id):
        return os_vmdk_path(datastore_id, image_id, IMAGE_FOLDER_NAME_PREFIX)

    def image_size(self, image_id):
        for image_ds in self._ds_manager.image_datastores():
            if self._ds_manager.datastore_type(image_ds) is DatastoreType.VSAN:
                if os.path.exists(os_vmdk_path(image_ds, image_id, IMAGE_FOLDER_NAME_PREFIX)):
                    # VSAN does not have flat.vmdk so we cannot get file size. Default to 1GB.
                    return 1024 ** 3
            else:
                try:
                    image_path = os_vmdk_flat_path(image_ds, image_id, IMAGE_FOLDER_NAME_PREFIX)
                    return os.path.getsize(image_path)
                except os.error:
                    pass
            self._logger.info("Image %s not found in DataStore %s" % (image_id, image_ds))

        self._logger.warning("Failed to get image size:", exc_info=True)
        # Failed to access shared image.
        raise NoSuchResourceException(ResourceType.IMAGE, "Image does not exist.")

    def get_image_metadata(self, image_id, datastore):
        metadata_path = os_metadata_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)
        self._logger.info("Loading metadata %s" % metadata_path)
        if os.path.exists(metadata_path):
            with open(metadata_path) as fh:
                try:
                    return json.load(fh)
                except ValueError:
                    self._logger.error("Error loading metadata file %s" % metadata_path, exc_info=True)
        return {}

    def _get_datastore_type(self, datastore_id):
        datastores = self._ds_manager.get_datastores()
        return [ds.type for ds in datastores if ds.id == datastore_id][0]

    def _copy_to_tmp_image(self, source_datastore, source_id, dest_datastore, dest_id):
        """ Copy an image into a temp location.
            1. Lock a tmp image destination file with an exclusive lock. This
            is to prevent the GC thread from garbage collecting directories
            that are actively being used.
            The temp directory name contains a random UUID to prevent
            collisions with concurrent copies
            2. Create the temp directory.
            3. Copy the metadata file over.
            4. Copy the vmdk over.

            @return the tmp image directory on success.
        """
        ds_type = self._get_datastore_type(dest_datastore)
        if ds_type == DatastoreType.VSAN:
            tmp_image_dir = os_datastore_path(dest_datastore,
                                              compond_path_join(IMAGE_FOLDER_NAME_PREFIX, dest_id),
                                              compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))
        else:
            tmp_image_dir = os_datastore_path(dest_datastore,
                                              compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))

        # Create the temp directory
        self._host_client.make_directory(tmp_image_dir)

        # Copy the metadata file if it exists.
        source_meta = os_metadata_path(source_datastore, source_id, IMAGE_FOLDER_NAME_PREFIX)
        if os.path.exists(source_meta):
            try:
                dest_meta = os.path.join(tmp_image_dir, metadata_filename(dest_id))
                shutil.copy(source_meta, dest_meta)
            except:
                self._logger.exception("Failed to copy metadata file %s", source_meta)
                raise

        # Create the timestamp file
        self._create_image_timestamp_file(tmp_image_dir)

        self._host_client.copy_disk(vmdk_path(source_datastore, source_id, IMAGE_FOLDER_NAME_PREFIX),
                                    os.path.join(tmp_image_dir, "%s.vmdk" % dest_id))
        return tmp_image_dir

    def _move_image(self, image_id, datastore, tmp_dir):
        """
        Atomic move of a tmp folder into the image datastore. Handles
        concurrent moves by locking a well know derivative of the image_id
        while doing the atomic move.
        The exclusive file lock ensures that only one move is successful.
        Has the following side effects:
            a - If the destination image already exists, it is assumed that
            someone else successfully copied the image over and the temp
            directory is deleted.
            b - If we fail to acquire the file lock after retrying 3 times,
            or the atomic move fails, the tmp image directory will be left
            behind and needs to be garbage collected later.

        image_id: String.The image id of the image being moved.
        datastore: String. The datastore id of the datastore.
        tmp_dir: String. The absolute path of the temp image directory.

        raises: OsError if the move fails
                AcquireLockFailure, InvalidFile if we fail to lock the
                destination image.
        """
        ds_type = self._get_datastore_type(datastore)
        image_path = os_datastore_path(datastore, compond_path_join(IMAGE_FOLDER_NAME_PREFIX, image_id))
        self._logger.info("_move_image: %s => %s, ds_type: %s" % (tmp_dir, image_path, ds_type))

        if not os.path.exists(tmp_dir):
            raise ImageNotFoundException("Temp image %s not found" % tmp_dir)

        try:
            with FileBackedLock(image_path, ds_type, retry=300, wait_secs=0.1):  # wait lock for 30 seconds
                if self._check_image_repair(image_id, datastore):
                    raise DiskAlreadyExistException("Image already exists")

                if ds_type == DatastoreType.VSAN:
                    # on VSAN, move all files under [datastore]/image_[image_id]/tmp_image_[uuid]/* to
                    # [datastore]/image_[image_id]/*.
                    # Also we do not delete tmp_image folder in success case, because VSAN accesses it
                    # when creating linked VM, even the folder is now empty.
                    for entry in os.listdir(tmp_dir):
                        shutil.move(os.path.join(tmp_dir, entry), os.path.join(image_path, entry))
                else:
                    # on VMFS/NFS/etc, rename [datastore]/tmp_image_[uuid] to [datastore]/tmp_image_[image_id]
                    self._host_client.move_file(tmp_dir, image_path)
        except:
            self._logger.exception("Move image %s to %s failed" % (image_id, image_path))
            self._host_client.delete_file(tmp_dir)
            raise

    """
    The following method should be used to check and validate the existence of a previously
    created image. With the new image delete path the "timestamp" file must exists inside the
    image directory. If the directory exists and the file does not, it may mean that an image
    delete operation was aborted mid-way. In this case the following method recreate the timestamp
    file. All operations are performed while holding the image directory lock (FileBackedLock),
    the caller is required to hold the lock.
    """
    def _check_image_repair(self, image_id, datastore):
        vmdk_pathname = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)

        image_dirname = os.path.dirname(vmdk_pathname)
        try:
            # Check vmdk file
            if not os.path.exists(vmdk_pathname):
                self._logger.info("Vmdk path doesn't exists: %s" % vmdk_pathname)
                return False
        except Exception as ex:
            self._logger.exception("Exception validating %s, %s" % (image_dirname, ex))
            return False

        # Check timestamp file
        timestamp_pathname = os.path.join(image_dirname, self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                self._logger.info("Timestamp file exists: %s" % timestamp_pathname)
                return True
        except Exception as ex:
            self._logger.exception("Exception validating %s, %s" % (timestamp_pathname, ex))

        # The timestamp file is not accessible, try creating one
        try:
            self._create_image_timestamp_file(image_dirname)
        except Exception as ex:
            self._logger.exception("Exception creating %s, %s" % (timestamp_pathname, ex))
            return False

        self._logger.info("Image repaired: %s" % image_dirname)
        return True

    def copy_image(self, source_datastore, source_id, dest_datastore, dest_id):
        """Copy an image between datastores.

        This method is used to create a "full clone" of a vmdk.
        It does so by copying a disk to a unique directory in a well known
        temporary directory then moving the disk to the destination image
        location. Data in the temporary directory not properly cleaned up
        will be periodically garbage collected by the reaper thread.

        This minimizes the window during which the vmdk path exists with
        incomplete content. It also works around a hostd issue where
        cp -f does not work.

        The current behavior for when the destination disk exists is
        to overwrite said disk.

        source_datastore: id of the source datastore
        source_id: id of the image to copy from
        dest_datastore: id of the destination datastore
        dest_id: id of the new image in the destination datastore

        throws: AcquireLockFailure if timed out waiting to acquire lock on tmp
                image directory
        throws: InvalidFile if unable to lock tmp image directory or some other
                reasons
        """
        if self.check_and_validate_image(dest_id, dest_datastore):
            # The image is copied, presumably via some other concurrent
            # copy, so we move on.
            self._logger.info("Image %s already copied" % dest_id)
            raise DiskAlreadyExistException("Image already exists")

        # Copy image to the tmp directory.
        tmp_dir = self._copy_to_tmp_image(source_datastore, source_id, dest_datastore, dest_id)

        self._move_image(dest_id, dest_datastore, tmp_dir)

    def reap_tmp_images(self):
        """ Clean up unused directories in the temp image folder. """
        for ds in self._ds_manager.get_datastores():
            for image_dir in list_top_level_directory(ds.id, TMP_IMAGE_FOLDER_NAME_PREFIX):
                if not os.path.isdir(image_dir):
                    continue

                create_time = os.stat(image_dir).st_ctime
                current_time = time.time()
                if current_time - self.REAP_TMP_IMAGES_GRACE_PERIOD < create_time:
                    # Skip folders that are newly created in past x minutes
                    # For example, during host-to-host transfer, hostd on
                    # receiving end stores the uploaded file in temp images
                    # folder but does not lock it with FileBackedLock, so we
                    # need to allow a grace period before reaping it.
                    self._logger.info("Skip folder: %s, created: %s, now: %s" % (image_dir, create_time, current_time))
                    continue

                try:
                    with FileBackedLock(image_dir, ds.type):
                        if os.path.exists(image_dir):
                            self._logger.info("Delete folder %s" % image_dir)
                            shutil.rmtree(image_dir, ignore_errors=True)
                except (AcquireLockFailure, InvalidFile):
                    self._logger.info("Already locked: %s, skipping" % image_dir)
                except:
                    self._logger.info("Unable to remove %s" % image_dir, exc_info=True)

    def get_images(self, datastore):
        """ Get image list from datastore
        :param datastore: datastore id
        :return: list of string, image id list
        """
        image_ids = []

        if not os.path.exists(os_datastore_root(datastore)):
            raise DatastoreNotFoundException()

        # image_folder is /vmfs/volumes/${datastore}/images_*
        for dir in list_top_level_directory(datastore, IMAGE_FOLDER_NAME_PREFIX):
            image_id = dir.split(COMPOND_PATH_SEPARATOR)[1]
            if self.check_image(image_id, datastore):
                image_ids.append(image_id)

        return image_ids

    def get_datastore_id_from_path(self, image_path):
        """Extract datastore id from the absolute path of an image.

        The image path looks something like this:

            /vmfs/volumes/datastore1/image_ttylinux/ttylinux.vmdk

        This method returns "datastore1" with this input.
        """
        return image_path.split(os.sep)[3]

    def get_image_id_from_path(self, image_path):
        """Extract image id from the absolute path of an image.

        The image path looks something like this:

            /vmfs/volumes/datastore1/images_ttylinux/ttylinux.vmdk

        This method returns "ttylinux" with this input.
        """
        return image_path.split(os.sep)[4].split(COMPOND_PATH_SEPARATOR)[1]

    def create_image(self, image_id, datastore_id):
        """ Create a temp image on given datastore, return its path.
        """
        datastore_type = self._get_datastore_type(datastore_id)
        if datastore_type == DatastoreType.VSAN:
            # on VSAN, tmp_dir is [datastore]/image_[image_id]/tmp_image_[uuid]
            # Because VSAN does not allow moving top-level directories, we place tmp_image
            # under image's dir.
            relative_path = os.path.join(compond_path_join(IMAGE_FOLDER_NAME_PREFIX, image_id),
                                         compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))
            tmp_dir = os_datastore_path(datastore_id, relative_path)
        else:
            # on VMFS/NFS/etc, tmp_dir is [datastore]/tmp_image_[uuid]
            tmp_dir = os_datastore_path(datastore_id,
                                        compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))

        self._host_client.make_directory(tmp_dir)
        # return datastore path, so that it can be passed to nfc client
        return os_to_datastore_path(tmp_dir)

    def finalize_image(self, datastore_id, tmp_dir, image_id):
        """ Installs an image using image data staged at a temp directory.
        """
        self._move_image(image_id, datastore_id, datastore_to_os_path(tmp_dir))
        self._create_image_timestamp_file(self._image_directory(datastore_id, image_id))

    def create_image_with_vm_disk(self, datastore_id, tmp_dir, image_id,
                                  vm_disk_os_path):
        """ Fills a temp image directory with a disk from a VM,
            then installs directory in the shared image folder.
        """
        # Create parent directory as required by CopyVirtualDisk_Task
        dst_vmdk_path = os.path.join(datastore_to_os_path(tmp_dir), "%s.vmdk" % image_id)
        if os.path.exists(dst_vmdk_path):
            self._logger.warning("Unexpected disk %s present, overwriting" % dst_vmdk_path)

        self._host_client.copy_disk(vm_disk_os_path, dst_vmdk_path)

        try:
            self.finalize_image(datastore_id, tmp_dir, image_id)
        except:
            self._logger.warning("Delete copied disk %s" % dst_vmdk_path)
            self._host_client.delete_disk(dst_vmdk_path)
            raise

    def delete_tmp_dir(self, datastore_id, tmp_dir):
        """ Deletes a temp image directory by moving it to a GC directory """
        file_path = os_datastore_path(datastore_id, tmp_dir)
        if not os.path.exists(file_path):
            self._logger.info("Tmp dir %s not" % file_path)
            raise DirectoryNotFound("Directory %s not found" % file_path)
        rm_rf(file_path)

    @staticmethod
    def _read_marker_file(filename):
        with open(filename, "r") as marker_file:
            start_time_str = marker_file.read()
        return float(start_time_str)

    """
    Delete a single image following the delete image steps. This
    method is supposed to be safe when run concurrently with:
    a) itself,
    b) image creation/copy,
    c) vm creation

    The steps are outlined here:
    1) Read content of the unused_image_marker file. If error, move on to next image.
    2) Acquire image-lock.
    3) Read the mod time on the t-stamp file. If t-stamp file doesn't exist go to 6.
    4) If the mod time of the t-stamp file is newer than the content of the marker
       file move on to next image.
    5) Move the t-stamp file to another name.
    6) Check the mod time on the new name of the t-stamp file. if the mod time has
       changed, move on to next image.
    7) move image directory to a trash location

    This method returns True if the image was removed, False if the image could not be removed.
    """

    def delete_image(self, datastore_id, image_id, grace_period):
        self._logger.info("delete_image: Starting to delete image: %s, %s" % (datastore_id, image_id))

        image_dir = self._image_directory(datastore_id, image_id)
        ds_type = self._get_datastore_type(datastore_id)
        marker_pathname = os.path.join(image_dir, self.UNUSED_IMAGE_MARKER_FILE_NAME)
        timestamp_pathname = os.path.join(image_dir, self.IMAGE_TIMESTAMP_FILE_NAME)

        try:
            with FileBackedLock(image_dir, ds_type):
                # Read marker file to determine when image scanner marked this image as unused
                marker_time = self._read_marker_file(marker_pathname)
                self._logger.info("delete_image: image was marked as unused at: %s" % marker_time)
                # Subtract grace time to avoid errors due to small difference in clock
                # values on different hosts. Pretend the scan started 60 seconds earlier.
                marker_time -= grace_period

                # Read timestamp mod_time to determine the latest vm creation using this image
                timestamp_exists, mod_time = self._get_mod_time(timestamp_pathname)
                self._logger.info("delete_image: image was last touched at: %s, %s" % (timestamp_exists, mod_time))

                # Image was touched (due to VM creation) after scanner marked it as unused.
                # Remove unused image marker file
                if timestamp_exists and mod_time >= marker_time:
                    self._logger.info("delete_image: image is in-use, do not delete")
                    os.unlink(marker_pathname)
                    return False

                # Delete image directory
                self._logger.info("delete_image: removing image directory: %s" % image_dir)
                self._host_client.delete_file(image_dir)

            return True
        except Exception:
            self._logger.exception("delete_image: failed to delete image")
            return False

    # Read the mod time on a file, returns two values, a boolean which is set to true if the
    # file exists, otherwise set to false and the mod time of the existing file
    def _get_mod_time(self, pathname):
        try:
            mod_time = os.path.getmtime(pathname)
        except OSError as ex:
            self._logger.warning("Cannot read mod time for file: %s, %s" % (pathname, ex))
            if ex.errno == errno.ENOENT:
                return False, 0
            else:
                raise ex
        return True, mod_time

    def get_timestamp_mod_time_from_dir(self, dirname):
        filename = os.path.join(dirname, self.IMAGE_TIMESTAMP_FILE_NAME)
        return self._get_mod_time(filename)

    def _create_image_timestamp_file(self, dirname):
        try:
            timestamp_pathname = os.path.join(dirname, self.IMAGE_TIMESTAMP_FILE_NAME)
            open(timestamp_pathname, 'w').close()
        except Exception as ex:
            self._logger.exception("Exception creating %s, %s" % (dirname, ex))
            raise ex

    def _image_directory(self, datastore_id, image_id):
        return os.path.dirname(os_vmdk_path(datastore_id, image_id, IMAGE_FOLDER_NAME_PREFIX))
Ejemplo n.º 14
0
class ImageManager():
    NUM_MAKEDIRS_ATTEMPTS = 10
    DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL = 600.0
    REAP_TMP_IMAGES_GRACE_PERIOD = 2 * 60.0 * 60.0  # 2 hrs
    DELETE_IMAGE_GRACE_PERIOD = 60
    UNUSED_IMAGE_MARKER_FILE_NAME = "unused_image_marker.txt"
    IMAGE_TIMESTAMP_FILE_NAME = "image_timestamp.txt"

    def __init__(self, host_client, ds_manager):
        self._logger = logging.getLogger(__name__)
        self._host_client = host_client
        self._ds_manager = ds_manager
        self._image_reaper = None

    def monitor_for_cleanup(self, reap_interval=DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL):
        self._image_reaper = Periodic(self.reap_tmp_images, reap_interval)
        self._image_reaper.daemon = True
        self._image_reaper.start()

    def cleanup(self):
        if self._image_reaper is not None:
            self._image_reaper.stop()

    def datastores_with_image(self, image_id, datastores):
        if image_id is None:
            return []
        return [ds for ds in datastores if self.check_image(image_id, ds)]

    def image_metadata(self, image_id, datastores):
        for ds in datastores:
            if self.check_image(image_id, ds):
                return self.get_image_metadata(image_id, ds)

    @staticmethod
    def get_image_id_from_disks(disks):
        """Find image id in the disk collection"""
        if not disks:
            return None

        for disk in disks:
            try:
                if disk.image.id is not None:
                    return disk.image.id
            except AttributeError:
                continue
        return None

    @log_duration
    def check_image(self, image_id, datastore):
        image_dir = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)
        try:
            return os.path.exists(image_dir)
        except:
            self._logger.exception("Error looking up %s" % image_dir)
            return False

    """
    The following method is intended as a replacement of check_image in
    the vm creation workflow compatible with the new image sweeper.
    For an image to be valid both the directory and the image timestamp
    file must exists on the datastore.
    """
    def check_and_validate_image(self, image_id, ds_id):
        image_dir = os.path.dirname(os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME_PREFIX))

        try:
            if not os.path.exists(image_dir):
                return False
        except:
            self._logger.exception("Error looking up %s" % image_dir)
            return False

        # Check the existence of the timestamp file
        timestamp_pathname = os.path.join(image_dir, self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                return True
        except Exception as ex:
            self._logger.exception("Exception looking up %s, %s" % (timestamp_pathname, ex))
            return False

        return False

    """
    This method is used to update the mod time on the image timestamp file.
    """
    def touch_image_timestamp(self, ds_id, image_id):

        image_path = os.path.dirname(os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME_PREFIX))

        # Touch the timestamp file
        timestamp_pathname = os.path.join(image_path, self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            os.utime(timestamp_pathname, None)
        except Exception as ex:
            self._logger.exception("Exception looking up %s, %s" % (timestamp_pathname, ex))
            raise ex

    @log_duration
    def check_image_dir(self, image_id, datastore):
        image_path = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)
        try:
            return os.path.exists(os.path.dirname(image_path))
        except:
            self._logger.error(
                "Error looking up %s" % image_path, exc_info=True)
            return False

    def get_image_directory_path(self, datastore_id, image_id):
        return image_directory_path(datastore_id, image_id)

    def get_image_path(self, datastore_id, image_id):
        return os_vmdk_path(datastore_id, image_id, IMAGE_FOLDER_NAME_PREFIX)

    def image_size(self, image_id):
        for image_ds in self._ds_manager.image_datastores():
            if self._ds_manager.datastore_type(image_ds) is DatastoreType.VSAN:
                if os.path.exists(os_vmdk_path(image_ds, image_id, IMAGE_FOLDER_NAME_PREFIX)):
                    # VSAN does not have flat.vmdk so we cannot get file size. Default to 1GB.
                    return 1024 ** 3
            else:
                try:
                    image_path = os_vmdk_flat_path(image_ds, image_id, IMAGE_FOLDER_NAME_PREFIX)
                    return os.path.getsize(image_path)
                except os.error:
                    pass
            self._logger.info("Image %s not found in DataStore %s" % (image_id, image_ds))

        self._logger.warning("Failed to get image size:", exc_info=True)
        # Failed to access shared image.
        raise NoSuchResourceException(ResourceType.IMAGE, "Image does not exist.")

    def get_image_metadata(self, image_id, datastore):
        metadata_path = os_metadata_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)
        self._logger.info("Loading metadata %s" % metadata_path)
        if os.path.exists(metadata_path):
            with open(metadata_path) as fh:
                try:
                    return json.load(fh)
                except ValueError:
                    self._logger.error("Error loading metadata file %s" % metadata_path, exc_info=True)
        return {}

    def _get_datastore_type(self, datastore_id):
        datastores = self._ds_manager.get_datastores()
        return [ds.type for ds in datastores if ds.id == datastore_id][0]

    def _copy_to_tmp_image(self, source_datastore, source_id, dest_datastore, dest_id):
        """ Copy an image into a temp location.
            1. Lock a tmp image destination file with an exclusive lock. This
            is to prevent the GC thread from garbage collecting directories
            that are actively being used.
            The temp directory name contains a random UUID to prevent
            collisions with concurrent copies
            2. Create the temp directory.
            3. Copy the metadata file over.
            4. Copy the vmdk over.

            @return the tmp image directory on success.
        """
        ds_type = self._get_datastore_type(dest_datastore)
        if ds_type == DatastoreType.VSAN:
            tmp_image_dir = os_datastore_path(dest_datastore,
                                              compond_path_join(IMAGE_FOLDER_NAME_PREFIX, dest_id),
                                              compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))
        else:
            tmp_image_dir = os_datastore_path(dest_datastore,
                                              compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))

        # Create the temp directory
        self._host_client.make_directory(tmp_image_dir)

        # Copy the metadata file if it exists.
        source_meta = os_metadata_path(source_datastore, source_id, IMAGE_FOLDER_NAME_PREFIX)
        if os.path.exists(source_meta):
            try:
                dest_meta = os.path.join(tmp_image_dir, metadata_filename(dest_id))
                shutil.copy(source_meta, dest_meta)
            except:
                self._logger.exception("Failed to copy metadata file %s", source_meta)
                raise

        # Create the timestamp file
        self._create_image_timestamp_file(tmp_image_dir)

        self._host_client.copy_disk(vmdk_path(source_datastore, source_id, IMAGE_FOLDER_NAME_PREFIX),
                                    os.path.join(tmp_image_dir, "%s.vmdk" % dest_id))
        return tmp_image_dir

    def _move_image(self, image_id, datastore, tmp_dir):
        """
        Atomic move of a tmp folder into the image datastore. Handles
        concurrent moves by locking a well know derivative of the image_id
        while doing the atomic move.
        The exclusive file lock ensures that only one move is successful.
        Has the following side effects:
            a - If the destination image already exists, it is assumed that
            someone else successfully copied the image over and the temp
            directory is deleted.
            b - If we fail to acquire the file lock after retrying 3 times,
            or the atomic move fails, the tmp image directory will be left
            behind and needs to be garbage collected later.

        image_id: String.The image id of the image being moved.
        datastore: String. The datastore id of the datastore.
        tmp_dir: String. The absolute path of the temp image directory.

        raises: OsError if the move fails
                AcquireLockFailure, InvalidFile if we fail to lock the
                destination image.
        """
        ds_type = self._get_datastore_type(datastore)
        image_path = os_datastore_path(datastore, compond_path_join(IMAGE_FOLDER_NAME_PREFIX, image_id))
        self._logger.info("_move_image: %s => %s, ds_type: %s" % (tmp_dir, image_path, ds_type))

        if not os.path.exists(tmp_dir):
            raise ImageNotFoundException("Temp image %s not found" % tmp_dir)

        try:
            with FileBackedLock(image_path, ds_type, retry=300, wait_secs=0.1):  # wait lock for 30 seconds
                if self._check_image_repair(image_id, datastore):
                    raise DiskAlreadyExistException("Image already exists")

                if ds_type == DatastoreType.VSAN:
                    # on VSAN, move all files under [datastore]/image_[image_id]/tmp_image_[uuid]/* to
                    # [datastore]/image_[image_id]/*.
                    # Also we do not delete tmp_image folder in success case, because VSAN accesses it
                    # when creating linked VM, even the folder is now empty.
                    for entry in os.listdir(tmp_dir):
                        shutil.move(os.path.join(tmp_dir, entry), os.path.join(image_path, entry))
                else:
                    # on VMFS/NFS/etc, rename [datastore]/tmp_image_[uuid] to [datastore]/tmp_image_[image_id]
                    self._host_client.move_file(tmp_dir, image_path)
        except:
            self._logger.exception("Move image %s to %s failed" % (image_id, image_path))
            self._host_client.delete_file(tmp_dir)
            raise

    """
    The following method should be used to check and validate the existence of a previously
    created image. With the new image delete path the "timestamp" file must exists inside the
    image directory. If the directory exists and the file does not, it may mean that an image
    delete operation was aborted mid-way. In this case the following method recreate the timestamp
    file. All operations are performed while holding the image directory lock (FileBackedLock),
    the caller is required to hold the lock.
    """
    def _check_image_repair(self, image_id, datastore):
        vmdk_pathname = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)

        image_dirname = os.path.dirname(vmdk_pathname)
        try:
            # Check vmdk file
            if not os.path.exists(vmdk_pathname):
                self._logger.info("Vmdk path doesn't exists: %s" % vmdk_pathname)
                return False
        except Exception as ex:
            self._logger.exception("Exception validating %s, %s" % (image_dirname, ex))
            return False

        # Check timestamp file
        timestamp_pathname = os.path.join(image_dirname, self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                self._logger.info("Timestamp file exists: %s" % timestamp_pathname)
                return True
        except Exception as ex:
            self._logger.exception("Exception validating %s, %s" % (timestamp_pathname, ex))

        # The timestamp file is not accessible, try creating one
        try:
            self._create_image_timestamp_file(image_dirname)
        except Exception as ex:
            self._logger.exception("Exception creating %s, %s" % (timestamp_pathname, ex))
            return False

        self._logger.info("Image repaired: %s" % image_dirname)
        return True

    def copy_image(self, source_datastore, source_id, dest_datastore, dest_id):
        """Copy an image between datastores.

        This method is used to create a "full clone" of a vmdk.
        It does so by copying a disk to a unique directory in a well known
        temporary directory then moving the disk to the destination image
        location. Data in the temporary directory not properly cleaned up
        will be periodically garbage collected by the reaper thread.

        This minimizes the window during which the vmdk path exists with
        incomplete content. It also works around a hostd issue where
        cp -f does not work.

        The current behavior for when the destination disk exists is
        to overwrite said disk.

        source_datastore: id of the source datastore
        source_id: id of the image to copy from
        dest_datastore: id of the destination datastore
        dest_id: id of the new image in the destination datastore

        throws: AcquireLockFailure if timed out waiting to acquire lock on tmp
                image directory
        throws: InvalidFile if unable to lock tmp image directory or some other
                reasons
        """
        if self.check_and_validate_image(dest_id, dest_datastore):
            # The image is copied, presumably via some other concurrent
            # copy, so we move on.
            self._logger.info("Image %s already copied" % dest_id)
            raise DiskAlreadyExistException("Image already exists")

        # Copy image to the tmp directory.
        tmp_dir = self._copy_to_tmp_image(source_datastore, source_id, dest_datastore, dest_id)

        self._move_image(dest_id, dest_datastore, tmp_dir)

    def reap_tmp_images(self):
        """ Clean up unused directories in the temp image folder. """
        for ds in self._ds_manager.get_datastores():
            for image_dir in list_top_level_directory(ds.id, TMP_IMAGE_FOLDER_NAME_PREFIX):
                if not os.path.isdir(image_dir):
                    continue

                create_time = os.stat(image_dir).st_ctime
                current_time = time.time()
                if current_time - self.REAP_TMP_IMAGES_GRACE_PERIOD < create_time:
                    # Skip folders that are newly created in past x minutes
                    # For example, during host-to-host transfer, hostd on
                    # receiving end stores the uploaded file in temp images
                    # folder but does not lock it with FileBackedLock, so we
                    # need to allow a grace period before reaping it.
                    self._logger.info("Skip folder: %s, created: %s, now: %s" % (image_dir, create_time, current_time))
                    continue

                try:
                    with FileBackedLock(image_dir, ds.type):
                        if os.path.exists(image_dir):
                            self._logger.info("Delete folder %s" % image_dir)
                            shutil.rmtree(image_dir, ignore_errors=True)
                except (AcquireLockFailure, InvalidFile):
                    self._logger.info("Already locked: %s, skipping" % image_dir)
                except:
                    self._logger.info("Unable to remove %s" % image_dir, exc_info=True)

    def get_images(self, datastore):
        """ Get image list from datastore
        :param datastore: datastore id
        :return: list of string, image id list
        """
        image_ids = []

        if not os.path.exists(os_datastore_root(datastore)):
            raise DatastoreNotFoundException()

        # image_folder is /vmfs/volumes/${datastore}/images_*
        for dir in list_top_level_directory(datastore, IMAGE_FOLDER_NAME_PREFIX):
            image_id = dir.split(COMPOND_PATH_SEPARATOR)[1]
            if self.check_image(image_id, datastore):
                image_ids.append(image_id)

        return image_ids

    def get_datastore_id_from_path(self, image_path):
        """Extract datastore id from the absolute path of an image.

        The image path looks something like this:

            /vmfs/volumes/datastore1/image_ttylinux/ttylinux.vmdk

        This method returns "datastore1" with this input.
        """
        return image_path.split(os.sep)[3]

    def get_image_id_from_path(self, image_path):
        """Extract image id from the absolute path of an image.

        The image path looks something like this:

            /vmfs/volumes/datastore1/images_ttylinux/ttylinux.vmdk

        This method returns "ttylinux" with this input.
        """
        return image_path.split(os.sep)[4].split(COMPOND_PATH_SEPARATOR)[1]

    def create_image(self, image_id, datastore_id):
        """ Create a temp image on given datastore, return its path.
        """
        datastore_type = self._get_datastore_type(datastore_id)
        if datastore_type == DatastoreType.VSAN:
            # on VSAN, tmp_dir is [datastore]/image_[image_id]/tmp_image_[uuid]
            # Because VSAN does not allow moving top-level directories, we place tmp_image
            # under image's dir.
            relative_path = os.path.join(compond_path_join(IMAGE_FOLDER_NAME_PREFIX, image_id),
                                         compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))
            tmp_dir = os_datastore_path(datastore_id, relative_path)
        else:
            # on VMFS/NFS/etc, tmp_dir is [datastore]/tmp_image_[uuid]
            tmp_dir = os_datastore_path(datastore_id,
                                        compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))

        self._host_client.make_directory(tmp_dir)
        # return datastore path, so that it can be passed to nfc client
        return os_to_datastore_path(tmp_dir)

    def finalize_image(self, datastore_id, tmp_dir, image_id):
        """ Installs an image using image data staged at a temp directory.
        """
        self._move_image(image_id, datastore_id, datastore_to_os_path(tmp_dir))
        self._create_image_timestamp_file(self._image_directory(datastore_id, image_id))

    def create_image_with_vm_disk(self, datastore_id, tmp_dir, image_id,
                                  vm_disk_os_path):
        """ Fills a temp image directory with a disk from a VM,
            then installs directory in the shared image folder.
        """
        # Create parent directory as required by CopyVirtualDisk_Task
        dst_vmdk_path = os.path.join(datastore_to_os_path(tmp_dir), "%s.vmdk" % image_id)
        if os.path.exists(dst_vmdk_path):
            self._logger.warning("Unexpected disk %s present, overwriting" % dst_vmdk_path)

        self._host_client.copy_disk(vm_disk_os_path, dst_vmdk_path)

        try:
            self.finalize_image(datastore_id, tmp_dir, image_id)
        except:
            self._logger.warning("Delete copied disk %s" % dst_vmdk_path)
            self._host_client.delete_disk(dst_vmdk_path)
            raise

    def delete_tmp_dir(self, datastore_id, tmp_dir):
        """ Deletes a temp image directory by moving it to a GC directory """
        file_path = os_datastore_path(datastore_id, tmp_dir)
        if not os.path.exists(file_path):
            self._logger.info("Tmp dir %s not" % file_path)
            raise DirectoryNotFound("Directory %s not found" % file_path)
        rm_rf(file_path)

    @staticmethod
    def _read_marker_file(filename):
        with open(filename, "r") as marker_file:
            start_time_str = marker_file.read()
        return float(start_time_str)

    """
    Delete a single image following the delete image steps. This
    method is supposed to be safe when run concurrently with:
    a) itself,
    b) image creation/copy,
    c) vm creation

    The steps are outlined here:
    1) Read content of the unused_image_marker file. If error, move on to next image.
    2) Acquire image-lock.
    3) Read the mod time on the t-stamp file. If t-stamp file doesn't exist go to 6.
    4) If the mod time of the t-stamp file is newer than the content of the marker
       file move on to next image.
    5) Move the t-stamp file to another name.
    6) Check the mod time on the new name of the t-stamp file. if the mod time has
       changed, move on to next image.
    7) move image directory to a trash location

    This method returns True if the image was removed, False if the image could not be removed.
    """

    def delete_image(self, datastore_id, image_id, grace_period):
        self._logger.info("delete_image: Starting to delete image: %s, %s" % (datastore_id, image_id))

        image_dir = self._image_directory(datastore_id, image_id)
        ds_type = self._get_datastore_type(datastore_id)
        marker_pathname = os.path.join(image_dir, self.UNUSED_IMAGE_MARKER_FILE_NAME)
        timestamp_pathname = os.path.join(image_dir, self.IMAGE_TIMESTAMP_FILE_NAME)

        try:
            with FileBackedLock(image_dir, ds_type):
                # Read marker file to determine when image scanner marked this image as unused
                marker_time = self._read_marker_file(marker_pathname)
                self._logger.info("delete_image: image was marked as unused at: %s" % marker_time)
                # Subtract grace time to avoid errors due to small difference in clock
                # values on different hosts. Pretend the scan started 60 seconds earlier.
                marker_time -= grace_period

                # Read timestamp mod_time to determine the latest vm creation using this image
                timestamp_exists, mod_time = self._get_mod_time(timestamp_pathname)
                self._logger.info("delete_image: image was last touched at: %s, %s" % (timestamp_exists, mod_time))

                # Image was touched (due to VM creation) after scanner marked it as unused.
                # Remove unused image marker file
                if timestamp_exists and mod_time >= marker_time:
                    self._logger.info("delete_image: image is in-use, do not delete")
                    os.unlink(marker_pathname)
                    return False

            # Delete image directory
            self._logger.info("delete_image: removing image directory: %s" % image_dir)
            if ds_type == DatastoreType.VSAN:
                # Special handling on VSAN before deleting an image
                self._delete_image_on_vsan(datastore_id, image_id)
            else:
                self._host_client.delete_file(image_dir)

            return True
        except Exception:
            self._logger.exception("delete_image: failed to delete image")
            return False

    # Special handling on VSAN
    # need to delete vdisk then osfs namespace in separate steps
    def _delete_image_on_vsan(self, datastore_id, image_id):
        self._logger.info("_delete_image_on_vsan: datastore_id=%s, image_id=%s" % (datastore_id, image_id))

        # clear ddb.deletable flag in .vmdk file which would otherwise cause
        # Vmacore::File::PermissionDeniedException (PR 1704935)
        vmdk_path = os_vmdk_path(datastore_id, image_id, IMAGE_FOLDER_NAME_PREFIX)
        temp_path = "%s~" % vmdk_path
        pattern = re.compile("^ddb.deletable = ")

        disk_file = open(vmdk_path)
        temp_file = open(temp_path, "w+")
        for line in disk_file:
            if not pattern.match(line):
                temp_file.write(line)
            else:
                self._logger.info("_delete_image_on_vsan: skip %s" % line)
        temp_file.close()
        disk_file.close()
        os.rename(temp_path, vmdk_path)
        # delete vdisk
        self._host_client.delete_file(vmdk_path)

        # delete folder content which would otherwise cause vim.fault.DirectoryNotEmpty (PR 1721520)
        image_dir = self._image_directory(datastore_id, image_id)
        for entry in os.listdir(image_dir):
            if not entry.startswith('.') or entry.endswith(".lck"):
                self._logger.info("_delete_image_on_vsan: delete %s" % os.path.join(image_dir, entry))
                entry_full_path = os.path.join(image_dir, entry)
                if os.path.isdir(entry_full_path):
                    rm_rf(entry_full_path)
                else:
                    os.unlink(entry_full_path)
        # delete folder (osfs namespace)
        self._host_client.delete_file(image_dir)

    # Read the mod time on a file, returns two values, a boolean which is set to true if the
    # file exists, otherwise set to false and the mod time of the existing file
    def _get_mod_time(self, pathname):
        try:
            mod_time = os.path.getmtime(pathname)
        except OSError as ex:
            self._logger.warning("Cannot read mod time for file: %s, %s" % (pathname, ex))
            if ex.errno == errno.ENOENT:
                return False, 0
            else:
                raise ex
        return True, mod_time

    def get_timestamp_mod_time_from_dir(self, dirname):
        filename = os.path.join(dirname, self.IMAGE_TIMESTAMP_FILE_NAME)
        return self._get_mod_time(filename)

    def _create_image_timestamp_file(self, dirname):
        try:
            timestamp_pathname = os.path.join(dirname, self.IMAGE_TIMESTAMP_FILE_NAME)
            open(timestamp_pathname, 'w').close()
        except Exception as ex:
            self._logger.exception("Exception creating %s, %s" % (dirname, ex))
            raise ex

    def _image_directory(self, datastore_id, image_id):
        return os.path.dirname(os_vmdk_path(datastore_id, image_id, IMAGE_FOLDER_NAME_PREFIX))
Ejemplo n.º 15
0
 def start_collection(self):
     self._collector_thread = Periodic(self.collect,
                                       self._collect_interval_secs)
     self._collector_thread.daemon = True
     self._collector_thread.start()
Ejemplo n.º 16
0
class StatsPublisher(object):
    DEFAULT_PUBLISH_INTERVAL_SECS = 20.0
    DEFAULT_PUBLISH_TRY_COUNT = 10
    DEFAULT_FAILED_PUBLISH_INTERVAL_SECS = 10 * 60

    def __init__(
        self,
        tsdb,
        publish_try_count=DEFAULT_PUBLISH_TRY_COUNT,
        failed_publish_interval_secs=DEFAULT_FAILED_PUBLISH_INTERVAL_SECS,
    ):
        self._logger = logging.getLogger(__name__)
        self._db = tsdb
        self._last_seen_ts = 0
        self.failed_count = 0
        self.publish_try_count = publish_try_count
        self.failed_publish_interval_secs = failed_publish_interval_secs

        # XXX plugin configuration should be decoupled from agent_config arg
        # parsing
        self._agent_config = common.services.get(ServiceName.AGENT_CONFIG)
        self._hostname = self._agent_config.hostname
        if self._hostname is None:
            self._hostname = socket.gethostname()

        self._publish_interval_secs = float(
            self._agent_config.__dict__.get("stats_publish_interval", StatsPublisher.DEFAULT_PUBLISH_INTERVAL_SECS)
        )

        self._publisher_thread = None
        self._publishers = []

    def start_publishing(self):
        self._publisher_thread = Periodic(self.publish, self._publish_interval_secs)
        self._publisher_thread.daemon = True
        self._publisher_thread.start()

    def stop_publishing(self):
        if self._publisher_thread is not None:
            self._publisher_thread.stop()

    def register_publisher(self, publisher):
        """ Add a new publisher

        Args:
            publisher: Publisher instance
        """
        self._publishers.append(publisher)

    def configure_publishers(self):
        stats_store_endpoint = self._agent_config.stats_store_endpoint
        stats_store_port = self._agent_config.stats_store_port
        stats_host_tags = self._agent_config.stats_host_tags
        pm_publisher = GraphitePublisher(
            hostname=self._hostname,
            carbon_host=stats_store_endpoint,
            carbon_port=stats_store_port,
            host_tags=stats_host_tags,
        )
        self.register_publisher(pm_publisher)
        self._logger.info("Stats publisher configured")

    def publish(self):
        if len(self._publishers) <= 0:
            self._logger.debug("No publishers found.")
            return

        retrieved_stats = {}
        latest_ts = self._last_seen_ts

        self._logger.debug("DB metrics size %d" % len(self._db.get_keys()))

        for metric in self._db.get_keys():
            values = self._db.get_values_since(self._last_seen_ts, metric)
            retrieved_stats[metric] = values
            if values:
                latest_ts = max(latest_ts, max([x[0] for x in values]))

        self._last_seen_ts = latest_ts
        if len(retrieved_stats) > 0:
            # Use first publisher by default for now
            publisher = self._publishers[0]
            published = publisher.publish(retrieved_stats)
            if not published:
                self.failed_count += 1
                self._logger.critical("Publisher failed to publish stats, failed_count:%s" % str(self.failed_count))
            elif self.failed_count > 0:
                self.failed_count = 0
                self._publisher_thread.update_wait_interval(self.DEFAULT_PUBLISH_INTERVAL_SECS)
        else:
            self._logger.debug("No metrics to send")

        if self.failed_count >= self.publish_try_count:
            self.failed_count = 0
            self._logger.critical(
                "Too many failed attempts to publish stats. Publisher will sleep for %s seconds now"
                % str(self.failed_publish_interval_secs)
            )
            self._publisher_thread.update_wait_interval(self.failed_publish_interval_secs)
Ejemplo n.º 17
0
 def start_publishing(self):
     self._publisher_thread = Periodic(self.publish, self._publish_interval_secs)
     self._publisher_thread.daemon = True
     self._publisher_thread.start()
Ejemplo n.º 18
0
 def monitor_for_cleanup(self, reap_interval=DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL):
     self._image_reaper = Periodic(self.reap_tmp_images, reap_interval)
     self._image_reaper.daemon = True
     self._image_reaper.start()
Ejemplo n.º 19
0
 def monitor_for_cleanup(self,
                         reap_interval=DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL):
     self._image_reaper = Periodic(self.reap_tmp_images, reap_interval)
     self._image_reaper.daemon = True
     self._image_reaper.start()
Ejemplo n.º 20
0
class EsxImageManager(ImageManager):
    NUM_MAKEDIRS_ATTEMPTS = 10
    DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL = 600.0
    REAP_TMP_IMAGES_GRACE_PERIOD = 600.0
    IMAGE_MARKER_FILE_NAME = "unused_image_marker.txt"
    IMAGE_TIMESTAMP_FILE_NAME = "image_timestamp.txt"
    IMAGE_TIMESTAMP_FILE_RENAME_SUFFIX = ".renamed"

    def __init__(self, vim_client, ds_manager):
        super(EsxImageManager, self).__init__()
        self._logger = logging.getLogger(__name__)
        self._vim_client = vim_client
        self._ds_manager = ds_manager
        self._image_reaper = None

    def monitor_for_cleanup(self,
                            reap_interval=DEFAULT_TMP_IMAGES_CLEANUP_INTERVAL):
        self._image_reaper = Periodic(self.reap_tmp_images, reap_interval)
        self._image_reaper.daemon = True
        self._image_reaper.start()

    def cleanup(self):
        if self._image_reaper is not None:
            self._image_reaper.stop()

    @log_duration
    def check_image(self, image_id, datastore):
        image_dir = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)
        try:
            return os.path.exists(image_dir)
        except:
            self._logger.exception(
                "Error looking up %s" % image_dir)
            return False

    """
    The following method is intended
    as a replacement of check_image in
    the vm creation workflow compatible
    with the new image sweeper.
    For an image to be valid both the
    directory and the image timestamp
    file must exists on the datastore.
    """
    def check_and_validate_image(self, image_id, ds_id):
        image_dir = os.path.dirname(
            os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME_PREFIX))

        try:
            if not os.path.exists(image_dir):
                return False
        except:
            self._logger.exception(
                "Error looking up %s" % image_dir)
            return False

        # Check the existence of the timestamp file
        timestamp_pathname = \
            os.path.join(image_dir,
                         self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                return True
        except Exception as ex:
            self._logger.exception(
                "Exception looking up %s, %s" % (timestamp_pathname, ex))
            return False

        return False

    """
    This method is used to update the mod time on the
    image timestamp file.
    """
    def touch_image_timestamp(self, ds_id, image_id):
        """
        :param ds_id:
        :param image_id:
        :return:
        """
        image_path = os.path.dirname(
            os_vmdk_path(ds_id, image_id, IMAGE_FOLDER_NAME_PREFIX))

        # Touch the timestamp file
        timestamp_pathname = os.path.join(image_path, self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            os.utime(timestamp_pathname, None)
        except Exception as ex:
            self._logger.exception(
                "Exception looking up %s, %s" % (timestamp_pathname, ex))
            raise ex

    @log_duration
    def check_image_dir(self, image_id, datastore):
        image_path = os_vmdk_path(datastore, image_id, IMAGE_FOLDER_NAME_PREFIX)
        try:
            return os.path.exists(os.path.dirname(image_path))
        except:
            self._logger.error(
                "Error looking up %s" % image_path, exc_info=True)
            return False

    def get_image_directory_path(self, datastore_id, image_id):
        return image_directory_path(datastore_id, image_id)

    def get_image_path(self, datastore_id, image_id):
        return os_vmdk_path(datastore_id, image_id, IMAGE_FOLDER_NAME_PREFIX)

    def image_size(self, image_id):
        for image_ds in self._ds_manager.image_datastores():
            try:
                image_path = os_vmdk_flat_path(image_ds, image_id,
                                               IMAGE_FOLDER_NAME_PREFIX)
                return os.path.getsize(image_path)
            except os.error:
                self._logger.info("Image %s not found in DataStore %s" %
                                  (image_id, image_ds))

        self._logger.warning("Failed to get image size:",
                             exc_info=True)
        # Failed to access shared image.
        raise NoSuchResourceException(
            ResourceType.IMAGE,
            "Image does not exist.")

    def _load_json(self, metadata_path):
        if os.path.exists(metadata_path):
            with open(metadata_path) as fh:
                try:
                    data = json.load(fh)
                    return data
                except ValueError:
                    self._logger.error(
                        "Error loading metadata file %s" % metadata_path,
                        exc_info=True)
        return {}

    def get_image_metadata(self, image_id, datastore):
        metadata_path = os_metadata_path(datastore,
                                         image_id,
                                         IMAGE_FOLDER_NAME_PREFIX)
        self._logger.info("Loading metadata %s" % metadata_path)
        return self._load_json(metadata_path)

    def _get_datastore_type(self, datastore_id):
        datastores = self._ds_manager.get_datastores()
        return [ds.type for ds in datastores if ds.id == datastore_id][0]

    def _prepare_virtual_disk_spec(self, disk_type, adapter_type):
        """
        :param disk_type [vim.VirtualDiskManager.VirtualDiskType]:
        :param adapter_type [vim.VirtualDiskManager.VirtualDiskAdapterType]:
        """
        _vd_spec = vim.VirtualDiskManager.VirtualDiskSpec()
        _vd_spec.diskType = str(disk_type)
        _vd_spec.adapterType = str(adapter_type)

        return _vd_spec

    def _copy_to_tmp_image(self, source_datastore, source_id, dest_datastore, dest_id):
        """ Copy an image into a temp location.
            1. Lock a tmp image destination file with an exclusive lock. This
            is to prevent the GC thread from garbage collecting directories
            that are actively being used.
            The temp directory name contains a random UUID to prevent
            collisions with concurrent copies
            2. Create the temp directory.
            3. Copy the metadata file over.
            4. Copy the vmdk over.

            @return the tmp image directory on success.
        """
        ds_type = self._get_datastore_type(dest_datastore)
        if ds_type == DatastoreType.VSAN:
            tmp_image_dir = os_datastore_path(dest_datastore,
                                              compond_path_join(IMAGE_FOLDER_NAME_PREFIX, dest_id),
                                              compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))
        else:
            tmp_image_dir = os_datastore_path(dest_datastore,
                                              compond_path_join(TMP_IMAGE_FOLDER_NAME_PREFIX, str(uuid.uuid4())))

        # Create the temp directory
        self._vim_client.make_directory(tmp_image_dir)

        # Copy the metadata file if it exists.
        source_meta = os_metadata_path(source_datastore, source_id, IMAGE_FOLDER_NAME_PREFIX)
        if os.path.exists(source_meta):
            try:
                dest_meta = os.path.join(tmp_image_dir, metadata_filename(dest_id))
                shutil.copy(source_meta, dest_meta)
            except:
                self._logger.exception("Failed to copy metadata file %s", source_meta)
                raise

        # Create the timestamp file
        self._create_image_timestamp_file(tmp_image_dir)

        _vd_spec = self._prepare_virtual_disk_spec(
            vim.VirtualDiskManager.VirtualDiskType.thin,
            vim.VirtualDiskManager.VirtualDiskAdapterType.lsiLogic)

        self._manage_disk(vim.VirtualDiskManager.CopyVirtualDisk_Task,
                          sourceName=vmdk_path(source_datastore, source_id, IMAGE_FOLDER_NAME_PREFIX),
                          destName=os_to_datastore_path(os.path.join(tmp_image_dir, "%s.vmdk" % dest_id)),
                          destSpec=_vd_spec)
        return tmp_image_dir

    def _move_image(self, image_id, datastore, tmp_dir):
        """
        Atomic move of a tmp folder into the image datastore. Handles
        concurrent moves by locking a well know derivative of the image_id
        while doing the atomic move.
        The exclusive file lock ensures that only one move is successful.
        Has the following side effects:
            a - If the destination image already exists, it is assumed that
            someone else successfully copied the image over and the temp
            directory is deleted.
            b - If we fail to acquire the file lock after retrying 3 times,
            or the atomic move fails, the tmp image directory will be left
            behind and needs to be garbage collected later.

        image_id: String.The image id of the image being moved.
        datastore: String. The datastore id of the datastore.
        tmp_dir: String. The absolute path of the temp image directory.

        raises: OsError if the move fails
                AcquireLockFailure, InvalidFile if we fail to lock the
                destination image.
        """
        ds_type = self._get_datastore_type(datastore)
        image_path = os_datastore_path(datastore, compond_path_join(IMAGE_FOLDER_NAME_PREFIX, image_id))
        self._logger.info("_move_image: %s => %s, ds_type: %s" % (tmp_dir, image_path, ds_type))

        if not os.path.exists(tmp_dir):
            raise ImageNotFoundException("Temp image %s not found" % tmp_dir)

        try:
            with FileBackedLock(image_path, ds_type, retry=300, wait_secs=0.01):  # wait lock for 3 seconds
                if self._check_image_repair(image_id, datastore):
                    raise DiskAlreadyExistException("Image already exists")

                if ds_type == DatastoreType.VSAN:
                    # on VSAN, move all files under [datastore]/image_[image_id]/tmp_image_[uuid]/* to
                    # [datastore]/image_[image_id]/*.
                    # Also we do not delete tmp_image folder in success case, because VSAN accesses it
                    # when creating linked VM, even the folder is now empty.
                    for entry in os.listdir(tmp_dir):
                        shutil.move(os.path.join(tmp_dir, entry), os.path.join(image_path, entry))
                else:
                    # on VMFS/NFS/etc, rename [datastore]/tmp_image_[uuid] to [datastore]/tmp_image_[image_id]
                    self._vim_client.move_file(tmp_dir, image_path)
        except:
            self._logger.exception("Move image %s to %s failed" % (image_id, image_path))
            self._vim_client.delete_file(tmp_dir)
            raise

    """
    The following method should be used to check
    and validate the existence of a previously
    created image. With the new image delete path
    the "timestamp" file must exists inside the
    image directory. If the directory exists and
    the file does not, it may mean that an image
    delete operation was aborted mid-way. In this
    case the following method recreate the timestamp
    file. All operations are performed while
    holding the image directory lock (FileBackedLock),
    the caller is required to hold the lock.
    """
    def _check_image_repair(self, image_id, datastore):
        vmdk_pathname = os_vmdk_path(datastore,
                                     image_id,
                                     IMAGE_FOLDER_NAME_PREFIX)

        image_dirname = os.path.dirname(vmdk_pathname)
        try:
            # Check vmdk file
            if not os.path.exists(vmdk_pathname):
                self._logger.info("Vmdk path doesn't exists: %s" %
                                  vmdk_pathname)
                return False
        except Exception as ex:
            self._logger.exception(
                "Exception validating %s, %s" % (image_dirname, ex))
            return False

        # Check timestamp file
        timestamp_pathname = \
            os.path.join(image_dirname,
                         self.IMAGE_TIMESTAMP_FILE_NAME)
        try:
            if os.path.exists(timestamp_pathname):
                self._logger.info("Timestamp file exists: %s" %
                                  timestamp_pathname)
                return True
        except Exception as ex:
            self._logger.exception(
                "Exception validating %s, %s" % (timestamp_pathname, ex))

        # The timestamp file is not accessible,
        # try creating one, if successful try to
        # delete the renamed timestamp file if it
        # exists
        try:
            self._create_image_timestamp_file(image_dirname)
            self._delete_renamed_image_timestamp_file(image_dirname)
        except Exception as ex:
            self._logger.exception(
                "Exception creating %s, %s" % (timestamp_pathname, ex))
            return False

        self._logger.info("Image repaired: %s" %
                          image_dirname)
        return True

    def copy_image(self, source_datastore, source_id, dest_datastore, dest_id):
        """Copy an image between datastores.

        This method is used to create a "full clone" of a vmdk.
        It does so by copying a disk to a unique directory in a well known
        temporary directory then moving the disk to the destination image
        location. Data in the temporary directory not properly cleaned up
        will be periodically garbage collected by the reaper thread.

        This minimizes the window during which the vmdk path exists with
        incomplete content. It also works around a hostd issue where
        cp -f does not work.

        The current behavior for when the destination disk exists is
        to overwrite said disk.

        source_datastore: id of the source datastore
        source_id: id of the image to copy from
        dest_datastore: id of the destination datastore
        dest_id: id of the new image in the destination datastore

        throws: AcquireLockFailure if timed out waiting to acquire lock on tmp
                image directory
        throws: InvalidFile if unable to lock tmp image directory or some other
                reasons
        """
        if self.check_and_validate_image(dest_id, dest_datastore):
            # The image is copied, presumably via some other concurrent
            # copy, so we move on.
            self._logger.info("Image %s already copied" % dest_id)
            raise DiskAlreadyExistException("Image already exists")

        # Copy image to the tmp directory.
        tmp_dir = self._copy_to_tmp_image(source_datastore, source_id,
                                          dest_datastore, dest_id)

        self._move_image(dest_id, dest_datastore, tmp_dir)

    def reap_tmp_images(self):
        """ Clean up unused directories in the temp image folder. """
        for ds in self._ds_manager.get_datastores():
            for image_dir in list_top_level_directory(ds.id, TMP_IMAGE_FOLDER_NAME_PREFIX):
                if not os.path.isdir(image_dir):
                    continue

                create_time = os.stat(image_dir).st_ctime
                current_time = time.time()
                if current_time - self.REAP_TMP_IMAGES_GRACE_PERIOD < create_time:
                    # Skip folders that are newly created in past x minutes
                    # For example, during host-to-host transfer, hostd on
                    # receiving end stores the uploaded file in temp images
                    # folder but does not lock it with FileBackedLock, so we
                    # need to allow a grace period before reaping it.
                    self._logger.info(
                        "Skip folder: %s, created: %s, now: %s" %
                        (image_dir, create_time, current_time))
                    continue

                try:
                    with FileBackedLock(image_dir, ds.type):
                        if os.path.exists(image_dir):
                            self._logger.info("Delete folder %s" % image_dir)
                            shutil.rmtree(image_dir, ignore_errors=True)
                except (AcquireLockFailure, InvalidFile):
                    self._logger.info("Already locked: %s, skipping" % image_dir)
                except:
                    self._logger.info("Unable to remove %s" % image_dir, exc_info=True)

    def get_images(self, datastore):
        """ Get image list from datastore
        :param datastore: datastore id
        :return: list of string, image id list
        """
        image_ids = []

        if not os.path.exists(os_datastore_root(datastore)):
            raise DatastoreNotFoundException()

        # image_folder is /vmfs/volumes/${datastore}/images_*
        for dir in list_top_level_directory(datastore, IMAGE_FOLDER_NAME_PREFIX):
            image_id = dir.split(COMPOND_PATH_SEPARATOR)[1]
            if self.check_image(image_id, datastore):
                image_ids.append(image_id)

        return image_ids

    def _unzip(self, src, dst):
        self._logger.info("unzip %s -> %s" % (src, dst))

        fsrc = gzip.open(src, "rb")
        fdst = open(dst, "wb")

        try:
            shutil.copyfileobj(fsrc, fdst)
        finally:
            fsrc.close()
            fdst.close()

    def _copy_disk(self, src, dst):
        self._manage_disk(vim.VirtualDiskManager.CopyVirtualDisk_Task,
                          sourceName=src, destName=dst)

    def _manage_disk(self, op, **kwargs):
        try:
            self._logger.debug("Invoking %s(%s)" % (op.info.name, kwargs))
            task = op(self._manager, **kwargs)
            self._vim_client.wait_for_task(task)
        except vim.Fault.FileAlreadyExists, e:
            raise DiskAlreadyExistException(e.msg)
        except vim.Fault.FileFault, e:
            raise DiskFileException(e.msg)