def consolidate(self): """ Consolidates the results obtained by the single WEKA processes into a consistent structure of collections that are stored on the file system. """ self._log("Consolidating results ...") # We load and store the results once into a PerformanceResultSummary. # From_multiple csv does the necessary consolidation # and mixes and parses the table. self._log("Reading intermediate results...") result_collection = PerformanceResultSummary(dataset_dir=self.result_directory) self._log("done") self._log("Storing result collection") result_collection.store(self.result_directory) self._log("done") # Write the specification of this operation # to the result directory in order to make later # analysis of results more easy source_operation_file = open(os.path.join(self.result_directory, "source_operation.yaml"), 'w') yaml.dump(self.operation_spec, source_operation_file) source_operation_file.close()
def _load_results_collection_from_file(self, file_name=None): """ Load results collection from file """ if file_name is None: # Let the user specify a file to be loaded self.file_name = \ str(QtGui.QFileDialog.getOpenFileName( parent=self, caption="Select a results file", filter="results files (*.csv)")) else: self.file_name = file_name # Try to load specified file dirname, filename = os.path.split(self.file_name) self.result_collection = PerformanceResultSummary(dataset_dir=dirname, csv_filename=filename) # Create working copy that can be modified self.current_collection = copy.deepcopy(self.result_collection)
def consolidate(self, _=None): """ Consolidates the results obtained by the single processes into a consistent structure of collections that are stored on the file system. """ # Consolidate the results directory_pattern = os.sep.join([ self.result_directory, "{*", ]) dataset_pathes = glob.glob(directory_pattern) # For all collections found for dataset_path in dataset_pathes: try: # Load their meta_data meta_data = BaseDataset.load_meta_data(dataset_path) # Determine author and date author = get_author() date = time.strftime("%Y%m%d_%H_%M_%S") # Update meta data and store it meta_data.update({"author": author, "date": date}) # There can be either run dirs, persistency dirs, or both of them. # Check of whichever there are more. If both exist, their numbers # are supposed to be equal. nr_run_dirs = len( glob.glob(os.path.join(dataset_path, "data_run*"))) nr_per_dirs = len( glob.glob(os.path.join(dataset_path, "persistency_run*"))) nr_runs = max(nr_run_dirs, nr_per_dirs) if nr_runs > 1: meta_data["runs"] = nr_runs # Store the metadata BaseDataset.store_meta_data(dataset_path, meta_data) # Copy the input dataset specification file to the result # directory in order to make later analysis of # the results more easy # THA: Split the first "/" from the input collection name, because otherwise it will be treated # as an absolute path input_collection_name = meta_data["input_collection_name"][1:] if \ meta_data["input_collection_name"][0] == os.sep else meta_data["input_collection_name"] input_meta_path = os.path.join(pySPACE.configuration.storage, input_collection_name) try: input_meta = BaseDataset.load_meta_data(input_meta_path) BaseDataset.store_meta_data( dataset_path, input_meta, file_name="input_metadata.yaml") except (IOError, OSError) as e: self._log("Error copying the input_metadata.yaml: {error}". format(error=e.message), level=logging.CRITICAL) except Exception as e: logging.getLogger("%s" % self).exception( "Error updating the metadata: {error!s}".format(error=e)) raise e # If we don't create a feature vector or time series collection, # we evaluated our classification using a classification performance sink. # The resulting files should be merged to one csv tabular. pathlist = glob.glob(os.path.join(self.result_directory, "results_*")) if len(pathlist) > 0: # Do the consolidation the same way as for WekaClassificationOperation self._log("Consolidating results ...") # We load and store the results once into a PerformanceResultSummary # This does the necessary consolidation... self._log("Reading intermediate results...") try: result_collection = PerformanceResultSummary( dataset_dir=self.result_directory) self._log("done") self._log("Storing result collection") result_collection.store(self.result_directory) self._log("done") PerformanceResultSummary.merge_traces(self.result_directory) except Exception as e: logging.getLogger("%s" % self).exception( "Error merging the result collection: {error!s}".format( error=e)) if self.compression: # Since we get one result summary, # we don't need the numerous folders. # So we zip them to make the whole folder more easy visible. import zipfile cwd = os.getcwd() os.chdir(self.result_directory) # If there are to many or to large folders, problems may occur. # This case we want to log, try 64 bit mode, # and then skip the zipping. try: pathlist = glob.glob( os.path.join(self.result_directory, "{*}")) if not self.compression == "delete": save_file = zipfile.ZipFile( self.result_directory + '/result_folders.zip', mode="w", compression=self.compression) # we want to have the zipped file relative to the # result directory for path in pathlist: for node in os.walk(path): rel_path = os.path.relpath( node[0], self.result_directory) save_file.write(rel_path) for data in node[2]: save_file.write( os.path.join(rel_path, data)) save_file.close() # To still have an easy access to the history of the # processing, we keep one folder. pathlist.pop() for path in pathlist: shutil.rmtree(path) except Exception, e: self._log("Result files could not be compressed with 32" + " bit mode, switching to 64 bit mode", level=logging.CRITICAL) # nearly total code copy, only difference with 64 bit mode try: pathlist = glob.glob( os.path.join(self.result_directory, "{*}")) save_file = zipfile.ZipFile( self.result_directory + '/result_folders.zip', mode="w", compression=self.compression, allowZip64=True) # we want to have the zipped file relative to the # result directory for path in pathlist: for node in os.walk(path): rel_path = os.path.relpath( node[0], self.result_directory) save_file.write(rel_path) for data in node[2]: save_file.write( os.path.join(rel_path, data)) save_file.close() # To still have an easy access to the history of the # processing, we keep one folder. pathlist.pop() for path in pathlist: shutil.rmtree(path) except: self._log( "64 bit mode also failed. Please check your files and your code or contact your local programmer!", level=logging.CRITICAL) os.chdir(cwd)