Esempio n. 1
0
 def consolidate(self):
     """
     Consolidates the results obtained by the single WEKA processes into
     a consistent structure of collections that are stored on the
     file system.
     """
     self._log("Consolidating results ...")
     # We load and store the results once into a PerformanceResultSummary.
     # From_multiple csv does the necessary consolidation
     # and mixes and parses the table.
     self._log("Reading intermediate results...")
     result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
     
     self._log("done")
     self._log("Storing result collection")
     
     result_collection.store(self.result_directory)
     
     self._log("done")
     
     
     # Write the specification of this operation
     # to the result directory in order to make later 
     # analysis of results more easy
     source_operation_file = open(os.path.join(self.result_directory,
                                               "source_operation.yaml"), 'w')
     yaml.dump(self.operation_spec, source_operation_file)
     source_operation_file.close()
Esempio n. 2
0
    def consolidate(self):
        """ Consolidates the results obtained by the single processes into a consistent structure
        of collections that are stored on the file system.
        """
        # Consolidate the results
        directory_pattern = os.sep.join([self.result_directory, "{*",])
        dataset_pathes = glob.glob(directory_pattern)

        # For all collections found
        for dataset_path in dataset_pathes:
            # Load their meta_data
            meta_data = BaseDataset.load_meta_data(dataset_path)

            # Determine author and date
            try:
                author = pwd.getpwuid(os.getuid())[4]
            except:
                author = "unknown"
                self._log("Author could not be resolved.",level=logging.WARNING)
            date = time.strftime("%Y%m%d_%H_%M_%S")

            # Update meta data and store it
            meta_data.update({"author" : author, "date" : date})
            BaseDataset.store_meta_data(dataset_path, meta_data)

            # Copy the input dataset specification file to the result
            # directory in order to make later analysis of
            # the results more easy
            input_meta_path = os.sep.join([pySPACE.configuration.storage,
                                          meta_data["input_collection_name"]])
            input_meta = BaseDataset.load_meta_data(input_meta_path)
            BaseDataset.store_meta_data(dataset_path,input_meta,
                                        file_name="input_metadata.yaml")
        # Check if some results consist of several runs
        # and update the meta data in this case
        # TODO: This is not a clean solution
        for dataset_dir in glob.glob(os.sep.join([self.result_directory,
                                                     "*"])):
            if not os.path.isdir(dataset_dir): continue
            # There can be either run dirs, persistency dirs, or both of them.
            # Check of whichever there are more. If both exist, their numbers
            # are supposed to be equal.
            nr_run_dirs = len(glob.glob(os.sep.join([dataset_dir,
                                              "data_run*"])))
            nr_per_dirs = len(glob.glob(os.sep.join([dataset_dir,
                                              "persistency_run*"])))
            nr_runs = max(nr_run_dirs, nr_per_dirs)

            if nr_runs > 1:
                collection_meta = BaseDataset.load_meta_data(dataset_dir)
                collection_meta["runs"] = nr_runs
                BaseDataset.store_meta_data(dataset_dir,collection_meta)
        # If we don't create a feature vector or time series collection,
        # we evaluated our classification using a classification performance sink.
        # The resulting files should be merged to one csv tabular.
        pathlist = glob.glob(os.path.join(self.result_directory,"results_*"))
        if len(pathlist)>0:
            # Do the consolidation the same way as for WekaClassificationOperation
            self._log("Consolidating results ...")
            # We load and store the results once into a PerformanceResultSummary
            # This does the necessary consolidation...
            self._log("Reading intermediate results...")
            result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
            self._log("done")
            self._log("Storing result collection")
            result_collection.store(self.result_directory)
            self._log("done")
            PerformanceResultSummary.merge_traces(self.result_directory)

            if not(self.compression == False):
                # Since we get one result summary,
                # we don't need the numerous folders.
                # So we zip them to make the whole folder more easy visible.
                import zipfile
                cwd=os.getcwd()
                os.chdir(self.result_directory)
                # If there are to many or to large folders, problems may occur.
                # This case we want to log, try 64 bit mode, and then skip the zipping.
                try:
                    pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))
                    
                    if not self.compression == "delete":                        
                        save_file=zipfile.ZipFile(self.result_directory+'/result_folders.zip',mode="w",compression=self.compression)
                        # we want to have the zipped file relative to the result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path=os.path.relpath(node[0],self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(os.path.join(rel_path,data))
                        save_file.close()
                    # To still have an easy access to the history of the processing,
                    # we keep one folder.
                    pathlist.pop()
                    for path in pathlist:
                        shutil.rmtree(path)
                except:
                    self._log("Result files could not be compressed with 32 bit mode, switching to 64 bit mode.", level=logging.CRITICAL)
                    # nearly total code copy, only difference with 64 bit mode
                    try:
                        pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))
                        save_file=zipfile.ZipFile(self.result_directory+'/result_folders.zip',mode="w",compression=self.compression, allowZip64=True)
                        # we want to have the zipped file relative to the result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path=os.path.relpath(node[0],self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(os.path.join(rel_path,data))
                        save_file.close()
                        # To still have an easy access to the history of the processing,
                        # we keep one folder.
                        pathlist.pop()
                        for path in pathlist:
                            shutil.rmtree(path)
                    except:
                        self._log("64 bit mode also failed. Please check your files and your code or contact your local programmer!", level=logging.CRITICAL)
                os.chdir(cwd)
Esempio n. 3
0
    def consolidate(self, _=None):
        """ Consolidates the results obtained by the single processes into a consistent structure
        of collections that are stored on the file system.
        """
        # Consolidate the results
        directory_pattern = os.sep.join([
            self.result_directory,
            "{*",
        ])
        dataset_pathes = glob.glob(directory_pattern)

        # For all collections found
        for dataset_path in dataset_pathes:
            try:
                # Load their meta_data
                meta_data = BaseDataset.load_meta_data(dataset_path)

                # Determine author and date
                author = get_author()
                date = time.strftime("%Y%m%d_%H_%M_%S")

                # Update meta data and store it
                meta_data.update({"author": author, "date": date})

                # There can be either run dirs, persistency dirs, or both of them.
                # Check of whichever there are more. If both exist, their numbers
                # are supposed to be equal.
                nr_run_dirs = len(
                    glob.glob(os.path.join(dataset_path, "data_run*")))
                nr_per_dirs = len(
                    glob.glob(os.path.join(dataset_path, "persistency_run*")))
                nr_runs = max(nr_run_dirs, nr_per_dirs)
                if nr_runs > 1:
                    meta_data["runs"] = nr_runs

                # Store the metadata
                BaseDataset.store_meta_data(dataset_path, meta_data)

                # Copy the input dataset specification file to the result
                # directory in order to make later analysis of
                # the results more easy
                # THA: Split the first "/" from the input collection name, because otherwise it will be treated
                # as an absolute path
                input_collection_name = meta_data["input_collection_name"][1:] if \
                    meta_data["input_collection_name"][0] == os.sep else meta_data["input_collection_name"]
                input_meta_path = os.path.join(pySPACE.configuration.storage,
                                               input_collection_name)
                try:
                    input_meta = BaseDataset.load_meta_data(input_meta_path)
                    BaseDataset.store_meta_data(
                        dataset_path,
                        input_meta,
                        file_name="input_metadata.yaml")
                except (IOError, OSError) as e:
                    self._log("Error copying the input_metadata.yaml: {error}".
                              format(error=e.message),
                              level=logging.CRITICAL)
            except Exception as e:
                logging.getLogger("%s" % self).exception(
                    "Error updating the metadata: {error!s}".format(error=e))
                raise e

        # If we don't create a feature vector or time series collection,
        # we evaluated our classification using a classification performance sink.
        # The resulting files should be merged to one csv tabular.
        pathlist = glob.glob(os.path.join(self.result_directory, "results_*"))
        if len(pathlist) > 0:
            # Do the consolidation the same way as for WekaClassificationOperation
            self._log("Consolidating results ...")
            # We load and store the results once into a PerformanceResultSummary
            # This does the necessary consolidation...
            self._log("Reading intermediate results...")
            try:
                result_collection = PerformanceResultSummary(
                    dataset_dir=self.result_directory)
                self._log("done")
                self._log("Storing result collection")
                result_collection.store(self.result_directory)
                self._log("done")
                PerformanceResultSummary.merge_traces(self.result_directory)
            except Exception as e:
                logging.getLogger("%s" % self).exception(
                    "Error merging the result collection: {error!s}".format(
                        error=e))

            if self.compression:
                # Since we get one result summary,
                # we don't need the numerous folders.
                # So we zip them to make the whole folder more easy visible.
                import zipfile
                cwd = os.getcwd()
                os.chdir(self.result_directory)
                # If there are to many or to large folders, problems may occur.
                # This case we want to log, try 64 bit mode,
                # and then skip the zipping.
                try:
                    pathlist = glob.glob(
                        os.path.join(self.result_directory, "{*}"))

                    if not self.compression == "delete":
                        save_file = zipfile.ZipFile(
                            self.result_directory + '/result_folders.zip',
                            mode="w",
                            compression=self.compression)
                        # we want to have the zipped file relative to the
                        # result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path = os.path.relpath(
                                    node[0], self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(
                                        os.path.join(rel_path, data))
                        save_file.close()
                    # To still have an easy access to the history of the
                    # processing, we keep one folder.
                    pathlist.pop()
                    for path in pathlist:
                        shutil.rmtree(path)
                except Exception, e:
                    self._log("Result files could not be compressed with 32" +
                              " bit mode, switching to 64 bit mode",
                              level=logging.CRITICAL)
                    # nearly total code copy, only difference with 64 bit mode
                    try:
                        pathlist = glob.glob(
                            os.path.join(self.result_directory, "{*}"))
                        save_file = zipfile.ZipFile(
                            self.result_directory + '/result_folders.zip',
                            mode="w",
                            compression=self.compression,
                            allowZip64=True)
                        # we want to have the zipped file relative to the
                        # result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path = os.path.relpath(
                                    node[0], self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(
                                        os.path.join(rel_path, data))
                        save_file.close()
                        # To still have an easy access to the history of the
                        # processing, we keep one folder.
                        pathlist.pop()
                        for path in pathlist:
                            shutil.rmtree(path)
                    except:
                        self._log(
                            "64 bit mode also failed. Please check your files and your code or contact your local programmer!",
                            level=logging.CRITICAL)
                os.chdir(cwd)
Esempio n. 4
0
    def consolidate(self, _=None):
        """ Consolidates the results obtained by the single processes into a consistent structure
        of collections that are stored on the file system.
        """
        # Consolidate the results
        directory_pattern = os.sep.join([self.result_directory, "{*",])
        dataset_pathes = glob.glob(directory_pattern)

        # For all collections found
        for dataset_path in dataset_pathes:
            try:
                # Load their meta_data
                meta_data = BaseDataset.load_meta_data(dataset_path)

                # Determine author and date
                author = get_author()
                date = time.strftime("%Y%m%d_%H_%M_%S")

                # Update meta data and store it
                meta_data.update({"author": author, "date": date})

                # There can be either run dirs, persistency dirs, or both of them.
                # Check of whichever there are more. If both exist, their numbers
                # are supposed to be equal.
                nr_run_dirs = len(glob.glob(os.path.join(dataset_path, "data_run*")))
                nr_per_dirs = len(glob.glob(os.path.join(dataset_path, "persistency_run*")))
                nr_runs = max(nr_run_dirs, nr_per_dirs)
                if nr_runs > 1:
                    meta_data["runs"] = nr_runs

                # Store the metadata
                BaseDataset.store_meta_data(dataset_path, meta_data)

                # Copy the input dataset specification file to the result
                # directory in order to make later analysis of
                # the results more easy
                # THA: Split the first "/" from the input collection name, because otherwise it will be treated
                # as an absolute path
                input_collection_name = meta_data["input_dataset_name"][1:] if \
                    meta_data["input_dataset_name"][0] == os.sep else meta_data["input_dataset_name"]
                input_meta_path = os.path.join(pySPACE.configuration.storage, input_collection_name)
                try:
                    input_meta = BaseDataset.load_meta_data(input_meta_path)
                    BaseDataset.store_meta_data(dataset_path, input_meta, file_name="input_metadata.yaml")
                except (IOError, OSError) as e:
                    self._log("Error copying the input_metadata.yaml: {error}".format(error=e.message),
                              level=logging.CRITICAL)
            except Exception as e:
                logging.getLogger("%s" % self).exception("Error updating the metadata: {error!s}".format(error=e))
                raise e

        # If we don't create a feature vector or time series collection,
        # we evaluated our classification using a classification performance sink.
        # The resulting files should be merged to one csv tabular.
        pathlist = glob.glob(os.path.join(self.result_directory,"results_*"))
        if len(pathlist)>0:
            # Do the consolidation the same way as for WekaClassificationOperation
            self._log("Consolidating results ...")
            # We load and store the results once into a PerformanceResultSummary
            # This does the necessary consolidation...
            self._log("Reading intermediate results...")
            try:
                result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
                self._log("done")
                self._log("Storing result collection")
                result_collection.store(self.result_directory)
                self._log("done")
                PerformanceResultSummary.merge_traces(self.result_directory)
            except Exception as e:
                logging.getLogger("%s" % self).exception("Error merging the result collection: {error!s}".format(
                    error=e))

            if self.compression:
                # Since we get one result summary,
                # we don't need the numerous folders.
                # So we zip them to make the whole folder more easy visible.
                import zipfile
                cwd = os.getcwd()
                os.chdir(self.result_directory)
                # If there are to many or to large folders, problems may occur.
                # This case we want to log, try 64 bit mode,
                # and then skip the zipping.
                try:
                    pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))

                    if not self.compression == "delete":
                        save_file = zipfile.ZipFile(
                            self.result_directory+'/result_folders.zip',
                            mode="w", compression=self.compression)
                        # we want to have the zipped file relative to the
                        # result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path=os.path.relpath(node[0],
                                                         self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(os.path.join(rel_path,
                                                                 data))
                        save_file.close()
                    # To still have an easy access to the history of the
                    # processing, we keep one folder.
                    pathlist.pop()
                    for path in pathlist:
                        shutil.rmtree(path)
                except Exception, e:
                    self._log("Result files could not be compressed with 32"+
                              " bit mode, switching to 64 bit mode",
                              level=logging.CRITICAL)
                    # nearly total code copy, only difference with 64 bit mode
                    try:
                        pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))
                        save_file=zipfile.ZipFile(
                            self.result_directory+'/result_folders.zip',
                            mode="w", compression=self.compression,
                            allowZip64=True)
                        # we want to have the zipped file relative to the
                        # result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path = os.path.relpath(node[0],
                                                         self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(os.path.join(rel_path,data))
                        save_file.close()
                        # To still have an easy access to the history of the
                        # processing, we keep one folder.
                        pathlist.pop()
                        for path in pathlist:
                            shutil.rmtree(path)
                    except:
                        self._log("64 bit mode also failed. Please check your files and your code or contact your local programmer!", level=logging.CRITICAL)
                os.chdir(cwd)