Ejemplo n.º 1
0
def post_process_for_test_mode(t):
    logger.info("Preparing track [%s] for test mode." % str(t))
    for index in t.indices:
        for type in index.types:
            if type.has_valid_document_data():
                logger.info(
                    "Reducing corpus size to 1000 documents for [%s/%s]" %
                    (index, type))
                type.number_of_documents = 1000

                path, ext = io.splitext(type.document_archive)
                path_2, ext_2 = io.splitext(path)

                type.document_archive = "%s-1k%s%s" % (path_2, ext_2, ext)
                type.document_file = "%s-1k%s" % (path_2, ext_2)
                # we don't want to check sizes
                type.compressed_size_in_bytes = None
                type.uncompressed_size_in_bytes = None

    for challenge in t.challenges:
        for task in challenge.schedule:
            if task.warmup_iterations > 1:
                logger.info("Resetting warmup iterations to 1 for [%s]" %
                            str(task))
                task.warmup_iterations = 1
            if task.iterations > 1:
                logger.info("Resetting measurement iterations to 1 for [%s]" %
                            str(task))
                task.iterations = 1
            if task.warmup_time_period is not None:
                logger.info(
                    "Resetting warmup time period for [%s] to 1 second." %
                    str(task))
                task.warmup_time_period = 1
    return t
Ejemplo n.º 2
0
def post_process_for_test_mode(t):
    logger.info("Preparing track [%s] for test mode." % str(t))
    for index in t.indices:
        for type in index.types:
            if type.has_valid_document_data():
                logger.info("Reducing corpus size to 1000 documents for [%s/%s]" % (index, type))
                type.number_of_documents = 1000

                path, ext = io.splitext(type.document_archive)
                path_2, ext_2 = io.splitext(path)

                type.document_archive = "%s-1k%s%s" % (path_2, ext_2, ext)
                type.document_file = "%s-1k%s" % (path_2, ext_2)
                # we don't want to check sizes
                type.compressed_size_in_bytes = None
                type.uncompressed_size_in_bytes = None

    for challenge in t.challenges:
        for task in challenge.schedule:
            if task.warmup_iterations > 1:
                logger.info("Resetting warmup iterations to 1 for [%s]" % str(task))
                task.warmup_iterations = 1
            if task.iterations > 1:
                logger.info("Resetting measurement iterations to 1 for [%s]" % str(task))
                task.iterations = 1
            if task.warmup_time_period is not None:
                logger.info("Resetting warmup time period for [%s] to 1 second." % str(task))
                task.warmup_time_period = 1
    return t
Ejemplo n.º 3
0
    def _create_type(self, type_spec, mapping_dir):
        docs = self._r(type_spec, "documents", mandatory=False)
        if docs:
            if io.is_archive(docs):
                document_archive = docs
                document_file = io.splitext(docs)[0]
            else:
                document_archive = None
                document_file = docs
            number_of_documents = self._r(type_spec, "document-count")
            compressed_bytes = self._r(type_spec, "compressed-bytes", mandatory=False)
            uncompressed_bytes = self._r(type_spec, "uncompressed-bytes", mandatory=False)
        else:
            document_archive = None
            document_file = None
            number_of_documents = 0
            compressed_bytes = 0
            uncompressed_bytes = 0

        mapping_file = os.path.join(mapping_dir, self._r(type_spec, "mapping"))
        with self.source(mapping_file, "rt") as f:
            mapping = json.load(f)

        return track.Type(name=self._r(type_spec, "name"),
                          mapping=mapping,
                          document_file=document_file,
                          document_archive=document_archive,
                          includes_action_and_meta_data=self._r(type_spec, "includes-action-and-meta-data", mandatory=False,
                                                                default_value=False),
                          number_of_documents=number_of_documents,
                          compressed_size_in_bytes=compressed_bytes,
                          uncompressed_size_in_bytes=uncompressed_bytes)
Ejemplo n.º 4
0
    def _create_type(self, type_spec, mapping_dir, data_dir):
        compressed_docs = self._r(type_spec, "documents", mandatory=False)
        if compressed_docs:
            document_archive = "%s/%s" % (data_dir, compressed_docs)
            document_file = "%s/%s" % (data_dir,
                                       io.splitext(compressed_docs)[0])
        else:
            document_archive = None
            document_file = None

        return track.Type(
            name=self._r(type_spec, "name"),
            mapping_file="%s/%s" %
            (mapping_dir, self._r(type_spec, "mapping")),
            document_file=document_file,
            document_archive=document_archive,
            number_of_documents=self._r(type_spec,
                                        "document-count",
                                        mandatory=False,
                                        default_value=0),
            compressed_size_in_bytes=self._r(type_spec,
                                             "compressed-bytes",
                                             mandatory=False),
            uncompressed_size_in_bytes=self._r(type_spec,
                                               "uncompressed-bytes",
                                               mandatory=False))
Ejemplo n.º 5
0
    def decompress(data_set_path, expected_size_in_bytes):
        # we assume that track data are always compressed and try to decompress them before running the benchmark
        basename, extension = io.splitext(data_set_path)
        decompressed = False
        if not os.path.isfile(basename) or os.path.getsize(
                basename) != expected_size_in_bytes:
            decompressed = True
            if type.uncompressed_size_in_bytes:
                console.info(
                    "Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... "
                    % (data_set_path, basename,
                       convert.bytes_to_gb(type.uncompressed_size_in_bytes)),
                    end='',
                    flush=True,
                    logger=logger)
            else:
                console.info(
                    "Decompressing track data from [%s] to [%s] ... " %
                    (data_set_path, basename),
                    end='',
                    flush=True,
                    logger=logger)

            io.decompress(data_set_path, io.dirname(data_set_path))
            console.println("[OK]")
            extracted_bytes = os.path.getsize(basename)
            if expected_size_in_bytes is not None and extracted_bytes != expected_size_in_bytes:
                raise exceptions.DataError(
                    "[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected."
                    % (basename, extracted_bytes, expected_size_in_bytes))
        return basename, decompressed
Ejemplo n.º 6
0
 def _unzip(self, data_set_path):
     # we assume that track data are always compressed and try to unzip them before running the benchmark
     basename, extension = io.splitext(data_set_path)
     if not os.path.isfile(basename):
         logger.info("Unzipping track data from [%s] to [%s]." % (data_set_path, basename))
         io.unzip(data_set_path, io.dirname(data_set_path))
     return basename
Ejemplo n.º 7
0
 def mapping_file_name(self, type):
     distribution_version = self._config.opts("source", "distribution.version", mandatory=False)
     if distribution_version and len(distribution_version.strip()) > 0:
         path, extension = io.splitext(type.mapping_file_name)
         return "%s-%s%s" % (path, distribution_version, extension)
     else:
         return type.mapping_file_name
Ejemplo n.º 8
0
 def _unzip(self, data_set_path):
     # we assume that track data are always compressed and try to unzip them before running the benchmark
     basename, extension = io.splitext(data_set_path)
     if not os.path.isfile(basename):
         logger.info("Unzipping track data from [%s] to [%s]." % (data_set_path, basename))
         io.unzip(data_set_path, io.dirname(data_set_path))
     return basename
Ejemplo n.º 9
0
 def mapping_file_name(self, type):
     distribution_version = self._config.opts("source", "distribution.version", mandatory=False)
     if distribution_version and len(distribution_version.strip()) > 0:
         path, extension = io.splitext(type.mapping_file_name)
         return "%s-%s%s" % (path, distribution_version, extension)
     else:
         return type.mapping_file_name
Ejemplo n.º 10
0
    def _create_type(self, type_spec, mapping_dir):
        compressed_docs = self._r(type_spec, "documents", mandatory=False)
        if compressed_docs:
            relative_data_dir = self.name.lower()
            document_archive = os.path.join(relative_data_dir, compressed_docs)
            document_file = os.path.join(relative_data_dir, io.splitext(compressed_docs)[0])
            number_of_documents = self._r(type_spec, "document-count")
            compressed_bytes = self._r(type_spec, "compressed-bytes", mandatory=False)
            uncompressed_bytes = self._r(type_spec, "uncompressed-bytes", mandatory=False)
        else:
            document_archive = None
            document_file = None
            number_of_documents = 0
            compressed_bytes = 0
            uncompressed_bytes = 0

        mapping_file = os.path.join(mapping_dir, self._r(type_spec, "mapping"))
        with self.source(mapping_file, "rt") as f:
            mapping = json.load(f)

        return track.Type(name=self._r(type_spec, "name"),
                          mapping=mapping,
                          document_file=document_file,
                          document_archive=document_archive,
                          includes_action_and_meta_data=self._r(type_spec, "includes-action-and-meta-data", mandatory=False,
                                                                default_value=False),
                          number_of_documents=number_of_documents,
                          compressed_size_in_bytes=compressed_bytes,
                          uncompressed_size_in_bytes=uncompressed_bytes)
Ejemplo n.º 11
0
def post_process_for_test_mode(t):
    logger.info("Preparing track [%s] for test mode." % str(t))
    for index in t.indices:
        for type in index.types:
            if type.has_valid_document_data():
                logger.info(
                    "Reducing corpus size to 1000 documents for [%s/%s]" %
                    (index, type))
                type.number_of_documents = 1000

                path, ext = io.splitext(type.document_archive)
                path_2, ext_2 = io.splitext(path)

                type.document_archive = "%s-1k%s%s" % (path_2, ext_2, ext)
                type.document_file = "%s-1k%s" % (path_2, ext_2)
                # we don't want to check sizes
                type.compressed_size_in_bytes = None
                type.uncompressed_size_in_bytes = None

    for challenge in t.challenges:
        for task in challenge.schedule:
            # we need iterate over leaf tasks and await iterating over possible intermediate 'parallel' elements
            for leaf_task in task:
                # iteration-based schedules are divided among all clients and we should provide at least one iteration for each client.
                if leaf_task.warmup_iterations > leaf_task.clients:
                    count = leaf_task.clients
                    logger.info("Resetting warmup iterations to %d for [%s]" %
                                (count, str(leaf_task)))
                    leaf_task.warmup_iterations = count
                if leaf_task.iterations > leaf_task.clients:
                    count = leaf_task.clients
                    logger.info(
                        "Resetting measurement iterations to %d for [%s]" %
                        (count, str(leaf_task)))
                    leaf_task.iterations = count
                if leaf_task.warmup_time_period is not None and leaf_task.warmup_time_period > 0:
                    leaf_task.warmup_time_period = 0
                    logger.info(
                        "Resetting warmup time period for [%s] to [%d] seconds."
                        % (str(leaf_task), leaf_task.warmup_time_period))
                if leaf_task.time_period is not None and leaf_task.time_period > 10:
                    leaf_task.time_period = 10
                    logger.info(
                        "Resetting measurement time period for [%s] to [%d] seconds."
                        % (str(leaf_task), leaf_task.time_period))
    return t
Ejemplo n.º 12
0
 def decompress(data_set_path, expected_size_in_bytes):
     # we assume that track data are always compressed and try to decompress them before running the benchmark
     basename, extension = io.splitext(data_set_path)
     if not os.path.isfile(basename) or os.path.getsize(basename) != expected_size_in_bytes:
         logger.info("Unzipping track data from [%s] to [%s]." % (data_set_path, basename))
         io.decompress(data_set_path, io.dirname(data_set_path))
         extracted_bytes = os.path.getsize(basename)
         if extracted_bytes != expected_size_in_bytes:
             raise exceptions.DataError("[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." %
                                        (basename, extracted_bytes, expected_size_in_bytes))
Ejemplo n.º 13
0
 def decompress(data_set_path, expected_size_in_bytes):
     # we assume that track data are always compressed and try to decompress them before running the benchmark
     basename, extension = io.splitext(data_set_path)
     if not os.path.isfile(basename) or os.path.getsize(basename) != expected_size_in_bytes:
         logger.info("Unzipping track data from [%s] to [%s]." % (data_set_path, basename))
         print("Decompressing %s (resulting size: %.2f GB) ... " %
               (type.document_archive, convert.bytes_to_gb(type.uncompressed_size_in_bytes)), end='', flush=True)
         io.decompress(data_set_path, io.dirname(data_set_path))
         print("Done")
         extracted_bytes = os.path.getsize(basename)
         if extracted_bytes != expected_size_in_bytes:
             raise exceptions.DataError("[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." %
                                        (basename, extracted_bytes, expected_size_in_bytes))
Ejemplo n.º 14
0
 def _configured_plugins(self, variables=None):
     configured_plugins = []
     # each directory is a plugin, each .ini is a config (just go one level deep)
     for entry in os.listdir(self.plugins_root_path):
         plugin_path = os.path.join(self.plugins_root_path, entry)
         if os.path.isdir(plugin_path):
             for child_entry in os.listdir(plugin_path):
                 if os.path.isfile(os.path.join(plugin_path, child_entry)) and io.has_extension(child_entry, ".ini"):
                     f, _ = io.splitext(child_entry)
                     plugin_name = self._file_to_plugin_name(entry)
                     config = io.basename(f)
                     configured_plugins.append(PluginDescriptor(name=plugin_name, config=config, variables=variables))
     return configured_plugins
Ejemplo n.º 15
0
 def _configured_plugins(self):
     configured_plugins = []
     # each directory is a plugin, each .ini is a config (just go one level deep)
     for entry in os.listdir(self.plugins_root_path):
         plugin_path = os.path.join(self.plugins_root_path, entry)
         if os.path.isdir(plugin_path):
             for child_entry in os.listdir(plugin_path):
                 if os.path.isfile(os.path.join(plugin_path, child_entry)) and io.has_extension(child_entry, ".ini"):
                     f, _ = io.splitext(child_entry)
                     plugin_name = self._file_to_plugin_name(entry)
                     config = io.basename(f)
                     configured_plugins.append(PluginDescriptor(name=plugin_name, config=config))
     return configured_plugins
Ejemplo n.º 16
0
def post_process_for_test_mode(t):
    logger.info("Preparing track [%s] for test mode." % str(t))
    for index in t.indices:
        for type in index.types:
            if type.has_valid_document_data():
                logger.info("Reducing corpus size to 1000 documents for [%s/%s]" % (index, type))
                type.number_of_documents = 1000

                path, ext = io.splitext(type.document_archive)
                path_2, ext_2 = io.splitext(path)

                type.document_archive = "%s-1k%s%s" % (path_2, ext_2, ext)
                type.document_file = "%s-1k%s" % (path_2, ext_2)
                # we don't want to check sizes
                type.compressed_size_in_bytes = None
                type.uncompressed_size_in_bytes = None

    for challenge in t.challenges:
        for task in challenge.schedule:
            # we need iterate over leaf tasks and await iterating over possible intermediate 'parallel' elements
            for leaf_task in task:
                # iteration-based schedules are divided among all clients and we should provide at least one iteration for each client.
                if leaf_task.warmup_iterations > leaf_task.clients:
                    count = leaf_task.clients
                    logger.info("Resetting warmup iterations to %d for [%s]" % (count, str(leaf_task)))
                    leaf_task.warmup_iterations = count
                if leaf_task.iterations > leaf_task.clients:
                    count = leaf_task.clients
                    logger.info("Resetting measurement iterations to %d for [%s]" % (count, str(leaf_task)))
                    leaf_task.iterations = count
                if leaf_task.warmup_time_period is not None and leaf_task.warmup_time_period > 0:
                    leaf_task.warmup_time_period = 0
                    logger.info("Resetting warmup time period for [%s] to [%d] seconds." % (str(leaf_task), leaf_task.warmup_time_period))
                if leaf_task.time_period is not None and leaf_task.time_period > 10:
                    leaf_task.time_period = 10
                    logger.info("Resetting measurement time period for [%s] to [%d] seconds." % (str(leaf_task), leaf_task.time_period))
    return t
Ejemplo n.º 17
0
    def _create_type(self, type_spec, mapping_dir, data_dir):
        compressed_docs = self._r(type_spec, "documents", mandatory=False)
        if compressed_docs:
            document_archive = "%s/%s" % (data_dir, compressed_docs)
            document_file = "%s/%s" % (data_dir, io.splitext(compressed_docs)[0])
        else:
            document_archive = None
            document_file = None

        return track.Type(name=self._r(type_spec, "name"),
                          mapping_file="%s/%s" % (mapping_dir, self._r(type_spec, "mapping")),
                          document_file=document_file,
                          document_archive=document_archive,
                          number_of_documents=self._r(type_spec, "document-count", mandatory=False, default_value=0),
                          compressed_size_in_bytes=self._r(type_spec, "compressed-bytes", mandatory=False),
                          uncompressed_size_in_bytes=self._r(type_spec, "uncompressed-bytes", mandatory=False)
                          )
Ejemplo n.º 18
0
    def __init__(self, track_path):
        if not os.path.exists(track_path):
            raise exceptions.SystemSetupError("Track path %s does not exist" % track_path)

        if os.path.isdir(track_path):
            self.track_name = io.basename(track_path)
            self._track_dir = track_path
            self._track_file = os.path.join(track_path, "track.json")
            if not os.path.exists(self._track_file):
                raise exceptions.SystemSetupError("Could not find track.json in %s" % track_path)
        elif os.path.isfile(track_path):
            if io.has_extension(track_path, ".json"):
                self._track_dir = io.dirname(track_path)
                self._track_file = track_path
                self.track_name = io.splitext(io.basename(track_path))[0]
            else:
                raise exceptions.SystemSetupError("%s has to be a JSON file" % track_path)
        else:
            raise exceptions.SystemSetupError("%s is neither a file nor a directory" % track_path)
Ejemplo n.º 19
0
    def decompress(data_set_path, expected_size_in_bytes):
        # we assume that track data are always compressed and try to decompress them before running the benchmark
        basename, extension = io.splitext(data_set_path)
        decompressed = False
        if not os.path.isfile(basename) or os.path.getsize(basename) != expected_size_in_bytes:
            decompressed = True
            if type.uncompressed_size_in_bytes:
                console.info("Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... " %
                             (data_set_path, basename, convert.bytes_to_gb(type.uncompressed_size_in_bytes)),
                             end='', flush=True, logger=logger)
            else:
                console.info("Decompressing track data from [%s] to [%s] ... " % (data_set_path, basename), end='',
                             flush=True, logger=logger)

            io.decompress(data_set_path, io.dirname(data_set_path))
            console.println("[OK]")
            extracted_bytes = os.path.getsize(basename)
            if expected_size_in_bytes is not None and extracted_bytes != expected_size_in_bytes:
                raise exceptions.DataError("[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." %
                                           (basename, extracted_bytes, expected_size_in_bytes))
        return basename, decompressed
Ejemplo n.º 20
0
def plain_text(file):
    _, ext = io.splitext(file)
    return ext in [
        ".ini", ".txt", ".json", ".yml", ".yaml", ".options", ".properties"
    ]
Ejemplo n.º 21
0
 def __is_car(path):
     _, extension = io.splitext(path)
     return extension == ".ini"
Ejemplo n.º 22
0
 def __car_name(path):
     p, _ = io.splitext(path)
     return io.basename(p)
Ejemplo n.º 23
0
 def __car_name(path):
     p, _ = io.splitext(path)
     return io.basename(p)
Ejemplo n.º 24
0
 def __is_car(path):
     _, extension = io.splitext(path)
     return extension == ".ini"
Ejemplo n.º 25
0
def plain_text(file):
    _, ext = io.splitext(file)
    return ext in [".ini", ".txt", ".json", ".yml", ".yaml", ".options", ".properties"]