Esempio n. 1
0
    def download(cfg, url, local_path, size_in_bytes):
        offline = cfg.opts("system", "offline.mode")
        file_exists = os.path.isfile(local_path)

        if file_exists:
            logger.info("[%s] already exists locally. Skipping download." %
                        local_path)
            return

        if not offline:
            logger.info("Downloading from [%s] to [%s]." % (url, local_path))
            try:
                io.ensure_dir(os.path.dirname(local_path))
                size_in_mb = round(convert.bytes_to_mb(size_in_bytes))
                # ensure output appears immediately
                print("Downloading data from %s (%s MB) ... " %
                      (url, size_in_mb),
                      end='',
                      flush=True)
                net.download(url, local_path, size_in_bytes)
                print("Done")
            except urllib.error.URLError:
                logger.exception("Could not download [%s] to [%s]." %
                                 (url, local_path))

        # file must exist at this point -> verify
        if not os.path.isfile(local_path):
            if offline:
                raise exceptions.SystemSetupError(
                    "Cannot find %s. Please disable offline mode and retry again."
                    % local_path)
            else:
                raise exceptions.SystemSetupError(
                    "Could not download from %s to %s. Please verify that data are available at %s and "
                    "check your internet connection." % (url, local_path, url))
Esempio n. 2
0
    def download(cfg, url, local_path, size_in_bytes):
        offline = cfg.opts("system", "offline.mode")
        file_exists = os.path.isfile(local_path)

        # ensure we only skip the download if the file size also matches our expectation
        if file_exists and (size_in_bytes is None
                            or os.path.getsize(local_path) == size_in_bytes):
            logger.info("[%s] already exists locally. Skipping download." %
                        local_path)
            return False

        if not offline:
            try:
                io.ensure_dir(os.path.dirname(local_path))
                if size_in_bytes:
                    size_in_mb = round(convert.bytes_to_mb(size_in_bytes))
                    # ensure output appears immediately
                    logger.info("Downloading data from [%s] (%s MB) to [%s]." %
                                (url, size_in_mb, local_path))
                else:
                    logger.info("Downloading data from [%s] to [%s]." %
                                (url, local_path))

                # we want to have a bit more accurate download progress as these files are typically very large
                progress = net.Progress(
                    "[INFO] Downloading data for track %s" % track.name,
                    accuracy=1)
                net.download(url,
                             local_path,
                             size_in_bytes,
                             progress_indicator=progress)
                progress.finish()
                logger.info("Downloaded data from [%s] to [%s]." %
                            (url, local_path))
            except urllib.error.URLError:
                logger.exception("Could not download [%s] to [%s]." %
                                 (url, local_path))

        # file must exist at this point -> verify
        if not os.path.isfile(local_path):
            if offline:
                raise exceptions.SystemSetupError(
                    "Cannot find %s. Please disable offline mode and retry again."
                    % local_path)
            else:
                raise exceptions.SystemSetupError(
                    "Cannot download from %s to %s. Please verify that data are available at %s and "
                    "check your internet connection." % (url, local_path, url))

        actual_size = os.path.getsize(local_path)
        if size_in_bytes is not None and actual_size != size_in_bytes:
            raise exceptions.DataError(
                "[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected."
                % (local_path, actual_size, size_in_bytes))

        return True
Esempio n. 3
0
    def download(cfg, url, local_path, size_in_bytes):
        offline = cfg.opts("system", "offline.mode")
        file_exists = os.path.isfile(local_path)

        # ensure we only skip the download if the file size also matches our expectation
        if file_exists and (size_in_bytes is None
                            or os.path.getsize(local_path) == size_in_bytes):
            logger.info("[%s] already exists locally. Skipping download." %
                        local_path)
            return False

        if not offline:
            try:
                io.ensure_dir(os.path.dirname(local_path))
                if size_in_bytes:
                    size_in_mb = round(convert.bytes_to_mb(size_in_bytes))
                    # ensure output appears immediately
                    console.info(
                        "Downloading data from [%s] (%s MB) to [%s] ... " %
                        (url, size_in_mb, local_path),
                        end='',
                        flush=True,
                        logger=logger)
                else:
                    console.info("Downloading data from [%s] to [%s] ... " %
                                 (url, local_path),
                                 end='',
                                 flush=True,
                                 logger=logger)

                net.download(url, local_path, size_in_bytes)
                console.println("[OK]")
            except urllib.error.URLError:
                logger.exception("Could not download [%s] to [%s]." %
                                 (url, local_path))

        # file must exist at this point -> verify
        if not os.path.isfile(local_path):
            if offline:
                raise exceptions.SystemSetupError(
                    "Cannot find %s. Please disable offline mode and retry again."
                    % local_path)
            else:
                raise exceptions.SystemSetupError(
                    "Cannot download from %s to %s. Please verify that data are available at %s and "
                    "check your internet connection." % (url, local_path, url))

        actual_size = os.path.getsize(local_path)
        if size_in_bytes is not None and actual_size != size_in_bytes:
            raise exceptions.DataError(
                "[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected."
                % (local_path, actual_size, size_in_bytes))

        return True
Esempio n. 4
0
def filters_from_included_tasks(included_tasks):
    filters = []
    if included_tasks:
        for t in included_tasks:
            spec = t.split(":")
            if len(spec) == 1:
                filters.append(track.TaskNameFilter(spec[0]))
            elif len(spec) == 2:
                if spec[0] == "type":
                    filters.append(track.TaskOpTypeFilter(spec[1]))
                else:
                    raise exceptions.SystemSetupError("Invalid format for included tasks: [%s]. Expected [type] but got [%s]." % (t, spec[0]))
            else:
                raise exceptions.SystemSetupError("Invalid format for included tasks: [%s]" % t)
    return filters
Esempio n. 5
0
def load_track(cfg):
    """

    Loads a track

    :param cfg: The config object. It contains the name of the track to load.
    :return: The loaded track.
    """
    track_name = cfg.opts("benchmarks", "track")
    try:
        repo = TrackRepository(cfg)
        reader = TrackFileReader(cfg)
        distribution_version = cfg.opts("source",
                                        "distribution.version",
                                        mandatory=False)
        data_root = cfg.opts("benchmarks", "local.dataset.cache")
        full_track = reader.read(
            track_name, repo.track_file(distribution_version, track_name),
            repo.track_dir(track_name),
            "%s/%s" % (data_root, track_name.lower()))
        if cfg.opts("benchmarks", "test.mode"):
            return post_process_for_test_mode(full_track)
        else:
            return full_track
    except FileNotFoundError:
        logger.exception("Cannot load track [%s]" % track_name)
        raise exceptions.SystemSetupError(
            "Cannot load track %s. List the available tracks with %s list tracks."
            % (track_name, PROGRAM_NAME))
Esempio n. 6
0
def load_track(cfg):
    """

    Loads a track

    :param cfg: The config object. It contains the name of the track to load.
    :return: The loaded track.
    """
    track_name = None
    try:
        repo = track_repo(cfg)
        track_name = repo.track_name
        track_dir = repo.track_dir(track_name)
        reader = TrackFileReader(cfg)
        included_tasks = cfg.opts("track", "include.tasks")

        current_track = reader.read(track_name, repo.track_file(track_name),
                                    track_dir)
        current_track = filter_included_tasks(
            current_track, filters_from_included_tasks(included_tasks))
        plugin_reader = TrackPluginReader(track_dir)
        current_track.has_plugins = plugin_reader.can_load()

        if cfg.opts("track", "test.mode.enabled"):
            return post_process_for_test_mode(current_track)
        else:
            return current_track
    except FileNotFoundError:
        logger.exception("Cannot load track [%s]" % track_name)
        raise exceptions.SystemSetupError(
            "Cannot load track %s. List the available tracks with %s list tracks."
            % (track_name, PROGRAM_NAME))
Esempio n. 7
0
 def _update(self, distribution_version):
     try:
         if self.remote and not self.offline:
             branch = versions.best_match(
                 git.branches(self.tracks_dir, remote=self.remote),
                 distribution_version)
             if branch:
                 logger.info(
                     "Rebasing on '%s' in '%s' for distribution version '%s'."
                     % (branch, self.tracks_dir, distribution_version))
                 git.rebase(self.tracks_dir, branch=branch)
                 return
             else:
                 msg = "Could not find track data remotely for distribution version %s. " \
                       "Trying to find track data locally." % distribution_version
                 logger.warn(msg)
         branch = versions.best_match(
             git.branches(self.tracks_dir, remote=False),
             distribution_version)
         if branch:
             logger.info(
                 "Checking out '%s' in '%s' for distribution version '%s'."
                 % (branch, self.tracks_dir, distribution_version))
             git.checkout(self.tracks_dir, branch=branch)
         else:
             raise exceptions.SystemSetupError(
                 "Cannot find track data for distribution version %s" %
                 distribution_version)
     except exceptions.SupplyError as e:
         raise exceptions.DataError("Cannot update track data in '%s': %s" %
                                    (self.tracks_dir, e))
Esempio n. 8
0
 def _update(self, distribution_version):
     try:
         if self.remote and not self.offline:
             branch = versions.best_match(git.branches(self.tracks_dir, remote=self.remote), distribution_version)
             if branch:
                 # Allow uncommitted changes iff we do not have to change the branch
                 logger.info(
                     "Checking out [%s] in [%s] for distribution version [%s]." % (branch, self.tracks_dir, distribution_version))
                 git.checkout(self.tracks_dir, branch=branch)
                 logger.info("Rebasing on [%s] in [%s] for distribution version [%s]." % (branch, self.tracks_dir, distribution_version))
                 try:
                     git.rebase(self.tracks_dir, branch=branch)
                 except exceptions.SupplyError:
                     logger.exception("Cannot rebase due to local changes in [%s]" % self.tracks_dir)
                     console.warn(
                         "Local changes in [%s] prevent track update from remote. Please commit your changes." % self.tracks_dir)
                 return
             else:
                 msg = "Could not find track data remotely for distribution version [%s]. " \
                       "Trying to find track data locally." % distribution_version
                 logger.warning(msg)
         branch = versions.best_match(git.branches(self.tracks_dir, remote=False), distribution_version)
         if branch:
             logger.info("Checking out [%s] in [%s] for distribution version [%s]." % (branch, self.tracks_dir, distribution_version))
             git.checkout(self.tracks_dir, branch=branch)
         else:
             raise exceptions.SystemSetupError("Cannot find track data for distribution version %s" % distribution_version)
     except exceptions.SupplyError:
         tb = sys.exc_info()[2]
         raise exceptions.DataError("Cannot update track data in [%s]." % self.tracks_dir).with_traceback(tb)
Esempio n. 9
0
 def __init__(self, cfg, fetch=True):
     self.cfg = cfg
     self.name = cfg.opts("track", "repository.name")
     self.offline = cfg.opts("system", "offline.mode")
     # If no URL is found, we consider this a local only repo (but still require that it is a git repo)
     self.url = cfg.opts("tracks", "%s.url" % self.name, mandatory=False)
     self.remote = self.url is not None and self.url.strip() != ""
     root = cfg.opts("node", "root.dir")
     track_repositories = cfg.opts("benchmarks", "track.repository.dir")
     self.tracks_dir = "%s/%s/%s" % (root, track_repositories, self.name)
     if self.remote and not self.offline and fetch:
         # a normal git repo with a remote
         if not git.is_working_copy(self.tracks_dir):
             git.clone(src=self.tracks_dir, remote=self.url)
         else:
             try:
                 git.fetch(src=self.tracks_dir)
             except exceptions.SupplyError:
                 console.warn(
                     "Could not update tracks. Continuing with your locally available state.",
                     logger=logger)
     else:
         if not git.is_working_copy(self.tracks_dir):
             raise exceptions.SystemSetupError(
                 "[{src}] must be a git repository.\n\nPlease run:\ngit -C {src} init"
                 .format(src=self.tracks_dir))
Esempio n. 10
0
 def load(self, track_plugin_path):
     plugin_name = io.basename(track_plugin_path)
     logger.info("Loading track plugin [%s] from [%s]" % (plugin_name, track_plugin_path))
     # search all paths within this directory for modules but exclude all directories starting with "_"
     module_dirs = []
     for dirpath, dirs, _ in os.walk(track_plugin_path):
         module_dirs.append(dirpath)
         ignore = []
         for d in dirs:
             if d.startswith("_"):
                 logger.debug("Removing [%s] from load path." % d)
                 ignore.append(d)
         for d in ignore:
             dirs.remove(d)
     # load path is only the root of the package hierarchy
     plugin_root_path = os.path.abspath(os.path.join(track_plugin_path, os.pardir))
     logger.debug("Adding [%s] to Python load path." % plugin_root_path)
     # needs to be at the beginning of the system path, otherwise import machinery tries to load application-internal modules
     sys.path.insert(0, plugin_root_path)
     try:
         root_module = self._load_plugin(plugin_name, module_dirs, track_plugin_path)
         # every module needs to have a register() method
         root_module.register(self)
     except BaseException:
         msg = "Could not load track plugin [%s]" % plugin_name
         logger.exception(msg)
         raise exceptions.SystemSetupError(msg)
Esempio n. 11
0
    def download_corpus(root_url, target_path, size_in_bytes, track_name, offline, test_mode):
        file_name = os.path.basename(target_path)

        if not root_url:
            raise exceptions.DataError("%s is missing and it cannot be downloaded because no source URL is provided in the track."
                                       % target_path)
        if offline:
            raise exceptions.SystemSetupError("Cannot find %s. Please disable offline mode and retry again." % target_path)

        data_url = "%s/%s" % (source_root_url, file_name)
        try:
            io.ensure_dir(os.path.dirname(target_path))
            if size_in_bytes:
                size_in_mb = round(convert.bytes_to_mb(size_in_bytes))
                logger.info("Downloading data from [%s] (%s MB) to [%s]." % (data_url, size_in_mb, target_path))
            else:
                logger.info("Downloading data from [%s] to [%s]." % (data_url, target_path))

            # we want to have a bit more accurate download progress as these files are typically very large
            progress = net.Progress("[INFO] Downloading data for track %s" % track_name, accuracy=1)
            net.download(data_url, target_path, size_in_bytes, progress_indicator=progress)
            progress.finish()
            logger.info("Downloaded data from [%s] to [%s]." % (data_url, target_path))
        except urllib.error.HTTPError as e:
            if e.code == 404 and test_mode:
                raise exceptions.DataError("Track [%s] does not support test mode. Please ask the track author to add it or "
                                           "disable test mode and retry." % track_name)
            else:
                msg = "Could not download [%s] to [%s]" % (data_url, target_path)
                if e.reason:
                    msg += " (HTTP status: %s, reason: %s)" % (str(e.code), e.reason)
                else:
                    msg += " (HTTP status: %s)" % str(e.code)
                raise exceptions.DataError(msg)
        except urllib.error.URLError:
            logger.exception("Could not download [%s] to [%s]." % (data_url, target_path))
            raise exceptions.DataError("Could not download [%s] to [%s]." % (data_url, target_path))

        if not os.path.isfile(target_path):
            raise exceptions.SystemSetupError(
                "Cannot download from %s to %s. Please verify that data are available at %s and "
                "check your internet connection." % (data_url, target_path, data_url))

        actual_size = os.path.getsize(target_path)
        if size_in_bytes is not None and actual_size != size_in_bytes:
            raise exceptions.DataError("[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." %
                                       (target_path, actual_size, size_in_bytes))
Esempio n. 12
0
 def load(self):
     root_module = self.loader.load()
     try:
         # every module needs to have a register() method
         root_module.register(self)
     except BaseException:
         msg = "Could not register track plugin at [%s]" % self.loader.root_path
         logger.exception(msg)
         raise exceptions.SystemSetupError(msg)
Esempio n. 13
0
    def __init__(self, track_path):
        if not os.path.exists(track_path):
            raise exceptions.SystemSetupError("Track path %s does not exist" % track_path)

        if os.path.isdir(track_path):
            self.track_name = io.basename(track_path)
            self._track_dir = track_path
            self._track_file = os.path.join(track_path, "track.json")
            if not os.path.exists(self._track_file):
                raise exceptions.SystemSetupError("Could not find track.json in %s" % track_path)
        elif os.path.isfile(track_path):
            if io.has_extension(track_path, ".json"):
                self._track_dir = io.dirname(track_path)
                self._track_file = track_path
                self.track_name = io.splitext(io.basename(track_path))[0]
            else:
                raise exceptions.SystemSetupError("%s has to be a JSON file" % track_path)
        else:
            raise exceptions.SystemSetupError("%s is neither a file nor a directory" % track_path)
Esempio n. 14
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.name = cfg.opts("system", "track.repository")
     # If no URL is found, we consider this a local only repo (but still require that it is a git repo)
     self.url = cfg.opts("tracks", "%s.url" % self.name, mandatory=False)
     self.remote = self.url is not None and self.url.strip() != ""
     root = cfg.opts("system", "root.dir")
     track_repositories = cfg.opts("benchmarks", "track.repository.dir")
     self.tracks_dir = "%s/%s/%s" % (root, track_repositories, self.name)
     if self.remote:
         # a normal git repo with a remote
         if not git.is_working_copy(self.tracks_dir):
             git.clone(src=self.tracks_dir, remote=self.url)
         else:
             git.fetch(src=self.tracks_dir, remote=self.url)
     else:
         if not git.is_working_copy(self.tracks_dir):
             raise exceptions.SystemSetupError("'{src}' must be a git repository.\n\nPlease run:\ngit -C {src} init"
                                               .format(src=self.tracks_dir))
Esempio n. 15
0
    def read(self, track_name, track_spec_file, mapping_dir):
        """
        Reads a track file, verifies it against the JSON schema and if valid, creates a track.

        :param track_name: The name of the track.
        :param track_spec_file: The complete path to the track specification file.
        :param mapping_dir: The directory where the mapping files for this track are stored locally.
        :return: A corresponding track instance if the track file is valid.
        """

        logger.info("Reading track specification file [%s]." % track_spec_file)
        try:
            rendered = render_template_from_file(track_spec_file, self.track_params)
            logger.info("Final rendered track for '%s': %s" % (track_spec_file, rendered))
            track_spec = json.loads(rendered)
        except jinja2.exceptions.TemplateNotFound:
            logger.exception("Could not load [%s]." % track_spec_file)
            raise exceptions.SystemSetupError("Track %s does not exist" % track_name)
        except (json.JSONDecodeError, jinja2.exceptions.TemplateError) as e:
            logger.exception("Could not load [%s]." % track_spec_file)
            raise TrackSyntaxError("Could not load '%s'" % track_spec_file, e)
        # check the track version before even attempting to validate the JSON format to avoid bogus errors.
        raw_version = track_spec.get("version", TrackFileReader.MAXIMUM_SUPPORTED_TRACK_VERSION)
        try:
            track_version = int(raw_version)
        except ValueError:
            raise exceptions.InvalidSyntax("version identifier for track %s must be numeric but was [%s]" % (track_name, str(raw_version)))
        if TrackFileReader.MAXIMUM_SUPPORTED_TRACK_VERSION < track_version:
            raise exceptions.RallyError("Track %s requires a newer version of Rally. Please upgrade Rally (supported track version: %d, "
                                        "required track version: %d)" %
                                        (track_name, TrackFileReader.MAXIMUM_SUPPORTED_TRACK_VERSION, track_version))
        try:
            jsonschema.validate(track_spec, self.track_schema)
        except jsonschema.exceptions.ValidationError as ve:
            raise TrackSyntaxError(
                "Track '%s' is invalid.\n\nError details: %s\nInstance: %s\nPath: %s\nSchema path: %s"
                % (track_name, ve.message,
                   json.dumps(ve.instance, indent=4, sort_keys=True), ve.absolute_path, ve.absolute_schema_path))
        return self.read_track(track_name, track_spec, mapping_dir)