Exemple #1
0
 def consolidate(self):
     """
     Consolidates the results obtained by the single WEKA processes into
     a consistent structure of collections that are stored on the
     file system.
     """
     self._log("Consolidating results ...")
     # We load and store the results once into a PerformanceResultSummary.
     # From_multiple csv does the necessary consolidation
     # and mixes and parses the table.
     self._log("Reading intermediate results...")
     result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
     
     self._log("done")
     self._log("Storing result collection")
     
     result_collection.store(self.result_directory)
     
     self._log("done")
     
     
     # Write the specification of this operation
     # to the result directory in order to make later 
     # analysis of results more easy
     source_operation_file = open(os.path.join(self.result_directory,
                                               "source_operation.yaml"), 'w')
     yaml.dump(self.operation_spec, source_operation_file)
     source_operation_file.close()
Exemple #2
0
    def consolidate(self):
        """
        Consolidates the results of the single processes into a consistent result of the whole
        operation
        """
        assert (self.state == "retrieved")

        if ((self.ProcessingSuccessful == True)
                and (len(self.CrashedProcesses) == 0)):
            self.current_operation.consolidate()

        if ((self.ProcessingSuccessful == True)
                and (len(self.CrashedProcesses) != 0)):
            import pySPACE.resources.dataset_defs.performance_result.PerformanceResultSummary as PerformanceResultSummary
            # merge the remaining files
            print "***************************************************************************************************"
            print "Starting merging . . ."
            PerformanceResultSummary.merge_performance_results(
                self.current_operation.result_directory)
            print "Merging complete . . ."
            print "***************************************************************************************************"

        self._log("Operation - consolidated")

        self.state = "consolidated"
Exemple #3
0
def main():
    import sys, os
    file_path = os.path.dirname(os.path.abspath(__file__))
    pyspace_path = file_path[:file_path.rfind('pySPACE')-1]
    if not pyspace_path in sys.path:
        sys.path.append(pyspace_path)

    from pySPACE.resources.dataset_defs.performance_result import PerformanceResultSummary as PerformanceResultSummary
    
    input_dir = sys.argv[1]
    PerformanceResultSummary.merge_performance_results(input_dir)
    def consolidate(self):
        """
        Consolidates the results of the single processes into a consistent result of the whole
        operation
        """
        assert(self.state == "retrieved")
        
        if ((self.ProcessingSuccessful ==True) and (len(self.CrashedProcesses) == 0)):
            self.current_operation.consolidate()
                 
        if ((self.ProcessingSuccessful ==True) and (len(self.CrashedProcesses) != 0)):
            import pySPACE.resources.dataset_defs.performance_result.PerformanceResultSummary as PerformanceResultSummary
            # merge the remaining files
            print "***************************************************************************************************"
            print "Starting merging . . ."
            PerformanceResultSummary.merge_performance_results(self.current_operation.result_directory)
            print "Merging complete . . ."
            print "***************************************************************************************************"

        self._log("Operation - consolidated")
        
        self.state = "consolidated"
Exemple #5
0
 def _load_results_collection_from_file(self, file_name=None):
     """ Load results collection from file  """
     if file_name is None:
         # Let the user specify a file to be loaded
         self.file_name = \
             str(QtGui.QFileDialog.getOpenFileName(
                 parent=self, caption="Select a results file",
                 filter="results files (*.csv)"))
     else:
         self.file_name = file_name
     # Try to load specified file 
     dirname, filename = os.path.split(self.file_name)      
     self.result_collection = PerformanceResultSummary(dataset_dir=dirname,
                                                       csv_filename=filename)
     # Create working copy that can be modified
     self.current_collection = copy.deepcopy(self.result_collection)
    def retrieve(self):
        """
        Returns the result of the operation.
        """
        
        self.state = "executing" 
        self._log("Operation - executing") 
        if (self.NumberOfProcessesToRunAtBeginning > len(self.process_args_list)):
            args = ([self.COMMAND_MPI] +
                ['--loadbalance']+
                ['--nolocal']+
                ['--hostfile'] +
                [pySPACE.configuration.root_dir+ "/" +'hostsfile'] +
                ['-n', str(len(self.process_args_list))] +
                [self.COMMAND_PYTHON] +  
                [self.runner_script] + 
                self.process_args_list)
            # Start the processes. 
            p =subprocess.Popen(args)
            #self.pids.append(p)
            self.IndexCopyStart += self.NumberOfProcessesToRunAtBeginning
            #print args
        else:
            #copy the arguments of the processes to run
            sub_process_args_list = (self.process_args_list[self.IndexCopyStart: 
                                     self.NumberOfProcessesToRunAtBeginning])
            args = ([self.COMMAND_MPI] +
                ['--loadbalance']+
                ['--nolocal']+
                ['--hostfile'] +
                [pySPACE.configuration.root_dir+ "/" +'hostsfile'] +
                ['-n', str(len(sub_process_args_list))] +
                [self.COMMAND_PYTHON] +  
                [self.runner_script] + 
                sub_process_args_list)
            # Start the processes. 
            p = subprocess.Popen(args)
            #self.pids.append(p) # TODO: call p.poll() for p in self.pids after all processes have exited
            self.IndexCopyStart += self.NumberOfProcessesToRunAtBeginning
            #print args

        # Create a list of boolean for processes which are finished.
        # First we assume that all processes are not started, so we set
        # every element of the list to false
        FinishedProcesses=[False for i in range(len(self.process_args_list))] 
        
        # Wait until all processes finish and start new processes
        # when old ones finish

        print "Waiting for the processes to finish...."

        # Counter for the processes which are finished. It will be reset
        # after 'NumberOfProcessesToRunLater' processes are finished
        CounterProcessesFinished = 0
        processes_Finished = False

        while not processes_Finished:
          try:
             processes_Finished = True
             for LoopCounter, process_args in enumerate(self.process_args_list):
                 if (self.not_xor (os.path.isfile(process_args+"_Finished"), 
                               os.path.isfile(process_args+"_Crashed"))):
                    processes_Finished = False
                 else:
                    if (FinishedProcesses[LoopCounter] == False):
                       # Record that the process is finished                       
                       FinishedProcesses[LoopCounter] = True
                       # If the process is crashed take note of that
                       if (os.path.isfile(process_args+"_Crashed")):
                           self.CrashedProcesses.append(process_args)
                       # Increment the counter for the number of processes finished
                       # by one
                       CounterProcessesFinished += 1
                       self.TotalProcessesFinished += 1 
                       # update the progress bar
                       self.progress_bar.update(self.TotalProcessesFinished)
                       if (CounterProcessesFinished == self.NumberOfProcessesToRunLater):
                          # Define a variable for a subset of processes to run
                          sub_process_args_list = []
                          if (self.IndexCopyStart==len(self.process_args_list)):
                              break
                          elif ((self.IndexCopyStart+self.NumberOfProcessesToRunLater)< len(self.process_args_list)):
                              sub_process_args_list = (self.process_args_list[self.IndexCopyStart:
                                                       self.IndexCopyStart +self.NumberOfProcessesToRunLater])
                          else:
                              sub_process_args_list = self.process_args_list[self.IndexCopyStart:len(self.process_args_list)]
                          args = ([self.COMMAND_MPI] +
                                 ['--loadbalance']+
                                 ['--nolocal']+
                                 ['--hostfile'] +
                                 [pySPACE.configuration.root_dir+ "/" +'hostsfile'] +
                                 ['-n', str(len(sub_process_args_list))] +
                                 [self.COMMAND_PYTHON] +  
                                 [self.runner_script] + 
                                 sub_process_args_list)
                          # Start the processes
                          if (len(sub_process_args_list) > 0):
                             p = subprocess.Popen(args)
                          #print args                          
                          # Adjust the start index
                          self.IndexCopyStart += self.NumberOfProcessesToRunLater
                          # Reset the counter for processes finished
                          CounterProcessesFinished = 0
             # sleep for one second                
             time.sleep(1)
          except (KeyboardInterrupt, SystemExit): # if processes hang forever
            self.ProcessingSuccessful = False
            print "*********************************************************************************************************"
            print "pySPACE forced to stop ..."
            print "Please wait until mpi_backend is finished with consolidating the results generated and with clean up ..."
            print "**********************************************************************************************************"
            import pySPACE.resources.dataset_defs.performance_result.PerformanceResultSummary as PerformanceResultSummary
            # merge the remaining files
            print "***************************************************************************************************"
            print "Starting merging . . ."
            PerformanceResultSummary.merge_performance_results(self.current_operation.result_directory)
            print "Merging complete . . ."
            print "***************************************************************************************************"
            break #The while loop will break

        self._log("Operation - processing finished")
        
        # Change the state to retrieved
        self.state = "retrieved"
        
        return None
    def consolidate(self):
        """ Consolidates the results obtained by the single processes into a consistent structure
        of collections that are stored on the file system.
        """
        # Consolidate the results
        directory_pattern = os.sep.join([self.result_directory, "{*",])
        dataset_pathes = glob.glob(directory_pattern)

        # For all collections found
        for dataset_path in dataset_pathes:
            # Load their meta_data
            meta_data = BaseDataset.load_meta_data(dataset_path)

            # Determine author and date
            try:
                author = pwd.getpwuid(os.getuid())[4]
            except:
                author = "unknown"
                self._log("Author could not be resolved.",level=logging.WARNING)
            date = time.strftime("%Y%m%d_%H_%M_%S")

            # Update meta data and store it
            meta_data.update({"author" : author, "date" : date})
            BaseDataset.store_meta_data(dataset_path, meta_data)

            # Copy the input dataset specification file to the result
            # directory in order to make later analysis of
            # the results more easy
            input_meta_path = os.sep.join([pySPACE.configuration.storage,
                                          meta_data["input_collection_name"]])
            input_meta = BaseDataset.load_meta_data(input_meta_path)
            BaseDataset.store_meta_data(dataset_path,input_meta,
                                        file_name="input_metadata.yaml")
        # Check if some results consist of several runs
        # and update the meta data in this case
        # TODO: This is not a clean solution
        for dataset_dir in glob.glob(os.sep.join([self.result_directory,
                                                     "*"])):
            if not os.path.isdir(dataset_dir): continue
            # There can be either run dirs, persistency dirs, or both of them.
            # Check of whichever there are more. If both exist, their numbers
            # are supposed to be equal.
            nr_run_dirs = len(glob.glob(os.sep.join([dataset_dir,
                                              "data_run*"])))
            nr_per_dirs = len(glob.glob(os.sep.join([dataset_dir,
                                              "persistency_run*"])))
            nr_runs = max(nr_run_dirs, nr_per_dirs)

            if nr_runs > 1:
                collection_meta = BaseDataset.load_meta_data(dataset_dir)
                collection_meta["runs"] = nr_runs
                BaseDataset.store_meta_data(dataset_dir,collection_meta)
        # If we don't create a feature vector or time series collection,
        # we evaluated our classification using a classification performance sink.
        # The resulting files should be merged to one csv tabular.
        pathlist = glob.glob(os.path.join(self.result_directory,"results_*"))
        if len(pathlist)>0:
            # Do the consolidation the same way as for WekaClassificationOperation
            self._log("Consolidating results ...")
            # We load and store the results once into a PerformanceResultSummary
            # This does the necessary consolidation...
            self._log("Reading intermediate results...")
            result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
            self._log("done")
            self._log("Storing result collection")
            result_collection.store(self.result_directory)
            self._log("done")
            PerformanceResultSummary.merge_traces(self.result_directory)

            if not(self.compression == False):
                # Since we get one result summary,
                # we don't need the numerous folders.
                # So we zip them to make the whole folder more easy visible.
                import zipfile
                cwd=os.getcwd()
                os.chdir(self.result_directory)
                # If there are to many or to large folders, problems may occur.
                # This case we want to log, try 64 bit mode, and then skip the zipping.
                try:
                    pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))
                    
                    if not self.compression == "delete":                        
                        save_file=zipfile.ZipFile(self.result_directory+'/result_folders.zip',mode="w",compression=self.compression)
                        # we want to have the zipped file relative to the result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path=os.path.relpath(node[0],self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(os.path.join(rel_path,data))
                        save_file.close()
                    # To still have an easy access to the history of the processing,
                    # we keep one folder.
                    pathlist.pop()
                    for path in pathlist:
                        shutil.rmtree(path)
                except:
                    self._log("Result files could not be compressed with 32 bit mode, switching to 64 bit mode.", level=logging.CRITICAL)
                    # nearly total code copy, only difference with 64 bit mode
                    try:
                        pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))
                        save_file=zipfile.ZipFile(self.result_directory+'/result_folders.zip',mode="w",compression=self.compression, allowZip64=True)
                        # we want to have the zipped file relative to the result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path=os.path.relpath(node[0],self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(os.path.join(rel_path,data))
                        save_file.close()
                        # To still have an easy access to the history of the processing,
                        # we keep one folder.
                        pathlist.pop()
                        for path in pathlist:
                            shutil.rmtree(path)
                    except:
                        self._log("64 bit mode also failed. Please check your files and your code or contact your local programmer!", level=logging.CRITICAL)
                os.chdir(cwd)
Exemple #8
0
    def retrieve(self, timeout=1e6):
        """
        Returns the result of the operation.
        """

        self.state = "executing"
        self._log("Operation - executing")
        if (self.NumberOfProcessesToRunAtBeginning > len(
                self.process_args_list)):
            args = ([self.COMMAND_MPI] + ['--loadbalance'] + ['--nolocal'] +
                    ['--hostfile'] +
                    [pySPACE.configuration.root_dir + "/" + 'hostsfile'] +
                    ['-n', str(len(self.process_args_list))] +
                    [self.COMMAND_PYTHON] + [self.runner_script] +
                    self.process_args_list)
            # Start the processes.
            self._log("mpi-parameters: %s" % args, level=logging.DEBUG)
            self._log("mpi-parameters-joined: %s" % os.path.join(args),
                      level=logging.DEBUG)
            p = subprocess.Popen(args)
            #self.pids.append(p)
            self.IndexCopyStart += self.NumberOfProcessesToRunAtBeginning
            #print args
        else:
            #copy the arguments of the processes to run
            sub_process_args_list = (
                self.process_args_list[self.IndexCopyStart:self.
                                       NumberOfProcessesToRunAtBeginning])
            args = ([self.COMMAND_MPI] + ['--loadbalance'] + ['--nolocal'] +
                    ['--hostfile'] +
                    [pySPACE.configuration.root_dir + "/" + 'hostsfile'] +
                    ['-n', str(len(sub_process_args_list))] +
                    [self.COMMAND_PYTHON] + [self.runner_script] +
                    sub_process_args_list)
            # Start the processes.
            p = subprocess.Popen(args)
            #self.pids.append(p) # TODO: call p.poll() for p in self.pids after all processes have exited
            self.IndexCopyStart += self.NumberOfProcessesToRunAtBeginning
            #print args

        # Create a list of boolean for processes which are finished.
        # First we assume that all processes are not started, so we set
        # every element of the list to false
        FinishedProcesses = [False for i in range(len(self.process_args_list))]

        # Wait until all processes finish and start new processes
        # when old ones finish

        print "Waiting for the processes to finish...."

        # Counter for the processes which are finished. It will be reset
        # after 'NumberOfProcessesToRunLater' processes are finished
        CounterProcessesFinished = 0
        processes_Finished = False

        while not processes_Finished:
            try:
                processes_Finished = True
                for LoopCounter, process_args in enumerate(
                        self.process_args_list):
                    if (self.not_xor(
                            os.path.isfile(process_args + "_Finished"),
                            os.path.isfile(process_args + "_Crashed"))):
                        processes_Finished = False
                    else:
                        if (FinishedProcesses[LoopCounter] == False):
                            # Record that the process is finished
                            FinishedProcesses[LoopCounter] = True
                            # If the process is crashed take note of that
                            if (os.path.isfile(process_args + "_Crashed")):
                                self.CrashedProcesses.append(process_args)
                            # Increment the counter for the number of processes finished
                            # by one
                            CounterProcessesFinished += 1
                            self.TotalProcessesFinished += 1
                            # update the progress bar
                            self.progress_bar.update(
                                self.TotalProcessesFinished)
                            if (CounterProcessesFinished ==
                                    self.NumberOfProcessesToRunLater):
                                # Define a variable for a subset of processes to run
                                sub_process_args_list = []
                                if (self.IndexCopyStart == len(
                                        self.process_args_list)):
                                    break
                                elif ((self.IndexCopyStart +
                                       self.NumberOfProcessesToRunLater) < len(
                                           self.process_args_list)):
                                    sub_process_args_list = (
                                        self.process_args_list[
                                            self.IndexCopyStart:self.
                                            IndexCopyStart +
                                            self.NumberOfProcessesToRunLater])
                                else:
                                    sub_process_args_list = self.process_args_list[
                                        self.IndexCopyStart:len(
                                            self.process_args_list)]
                                args = (
                                    [self.COMMAND_MPI] + ['--loadbalance'] +
                                    ['--nolocal'] + ['--hostfile'] + [
                                        pySPACE.configuration.root_dir + "/" +
                                        'hostsfile'
                                    ] +
                                    ['-n',
                                     str(len(sub_process_args_list))] +
                                    [self.COMMAND_PYTHON] +
                                    [self.runner_script] +
                                    sub_process_args_list)
                                # Start the processes
                                if (len(sub_process_args_list) > 0):
                                    p = subprocess.Popen(args)
                                #print args
                                # Adjust the start index
                                self.IndexCopyStart += self.NumberOfProcessesToRunLater
                                # Reset the counter for processes finished
                                CounterProcessesFinished = 0
                # sleep for one second
                time.sleep(1)
            except (KeyboardInterrupt,
                    SystemExit):  # if processes hang forever
                self.ProcessingSuccessful = False
                print "*********************************************************************************************************"
                print "pySPACE forced to stop ..."
                print "Please wait until mpi_backend is finished with consolidating the results generated and with clean up ..."
                print "**********************************************************************************************************"
                import pySPACE.resources.dataset_defs.performance_result.PerformanceResultSummary as PerformanceResultSummary
                # merge the remaining files
                print "***************************************************************************************************"
                print "Starting merging . . ."
                PerformanceResultSummary.merge_performance_results(
                    self.current_operation.result_directory)
                print "Merging complete . . ."
                print "***************************************************************************************************"
                break  #The while loop will break

        self._log("Operation - processing finished")

        # Change the state to retrieved
        self.state = "retrieved"

        return None
Exemple #9
0
    def consolidate(self, _=None):
        """ Consolidates the results obtained by the single processes into a consistent structure
        of collections that are stored on the file system.
        """
        # Consolidate the results
        directory_pattern = os.sep.join([
            self.result_directory,
            "{*",
        ])
        dataset_pathes = glob.glob(directory_pattern)

        # For all collections found
        for dataset_path in dataset_pathes:
            try:
                # Load their meta_data
                meta_data = BaseDataset.load_meta_data(dataset_path)

                # Determine author and date
                author = get_author()
                date = time.strftime("%Y%m%d_%H_%M_%S")

                # Update meta data and store it
                meta_data.update({"author": author, "date": date})

                # There can be either run dirs, persistency dirs, or both of them.
                # Check of whichever there are more. If both exist, their numbers
                # are supposed to be equal.
                nr_run_dirs = len(
                    glob.glob(os.path.join(dataset_path, "data_run*")))
                nr_per_dirs = len(
                    glob.glob(os.path.join(dataset_path, "persistency_run*")))
                nr_runs = max(nr_run_dirs, nr_per_dirs)
                if nr_runs > 1:
                    meta_data["runs"] = nr_runs

                # Store the metadata
                BaseDataset.store_meta_data(dataset_path, meta_data)

                # Copy the input dataset specification file to the result
                # directory in order to make later analysis of
                # the results more easy
                # THA: Split the first "/" from the input collection name, because otherwise it will be treated
                # as an absolute path
                input_collection_name = meta_data["input_collection_name"][1:] if \
                    meta_data["input_collection_name"][0] == os.sep else meta_data["input_collection_name"]
                input_meta_path = os.path.join(pySPACE.configuration.storage,
                                               input_collection_name)
                try:
                    input_meta = BaseDataset.load_meta_data(input_meta_path)
                    BaseDataset.store_meta_data(
                        dataset_path,
                        input_meta,
                        file_name="input_metadata.yaml")
                except (IOError, OSError) as e:
                    self._log("Error copying the input_metadata.yaml: {error}".
                              format(error=e.message),
                              level=logging.CRITICAL)
            except Exception as e:
                logging.getLogger("%s" % self).exception(
                    "Error updating the metadata: {error!s}".format(error=e))
                raise e

        # If we don't create a feature vector or time series collection,
        # we evaluated our classification using a classification performance sink.
        # The resulting files should be merged to one csv tabular.
        pathlist = glob.glob(os.path.join(self.result_directory, "results_*"))
        if len(pathlist) > 0:
            # Do the consolidation the same way as for WekaClassificationOperation
            self._log("Consolidating results ...")
            # We load and store the results once into a PerformanceResultSummary
            # This does the necessary consolidation...
            self._log("Reading intermediate results...")
            try:
                result_collection = PerformanceResultSummary(
                    dataset_dir=self.result_directory)
                self._log("done")
                self._log("Storing result collection")
                result_collection.store(self.result_directory)
                self._log("done")
                PerformanceResultSummary.merge_traces(self.result_directory)
            except Exception as e:
                logging.getLogger("%s" % self).exception(
                    "Error merging the result collection: {error!s}".format(
                        error=e))

            if self.compression:
                # Since we get one result summary,
                # we don't need the numerous folders.
                # So we zip them to make the whole folder more easy visible.
                import zipfile
                cwd = os.getcwd()
                os.chdir(self.result_directory)
                # If there are to many or to large folders, problems may occur.
                # This case we want to log, try 64 bit mode,
                # and then skip the zipping.
                try:
                    pathlist = glob.glob(
                        os.path.join(self.result_directory, "{*}"))

                    if not self.compression == "delete":
                        save_file = zipfile.ZipFile(
                            self.result_directory + '/result_folders.zip',
                            mode="w",
                            compression=self.compression)
                        # we want to have the zipped file relative to the
                        # result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path = os.path.relpath(
                                    node[0], self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(
                                        os.path.join(rel_path, data))
                        save_file.close()
                    # To still have an easy access to the history of the
                    # processing, we keep one folder.
                    pathlist.pop()
                    for path in pathlist:
                        shutil.rmtree(path)
                except Exception, e:
                    self._log("Result files could not be compressed with 32" +
                              " bit mode, switching to 64 bit mode",
                              level=logging.CRITICAL)
                    # nearly total code copy, only difference with 64 bit mode
                    try:
                        pathlist = glob.glob(
                            os.path.join(self.result_directory, "{*}"))
                        save_file = zipfile.ZipFile(
                            self.result_directory + '/result_folders.zip',
                            mode="w",
                            compression=self.compression,
                            allowZip64=True)
                        # we want to have the zipped file relative to the
                        # result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path = os.path.relpath(
                                    node[0], self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(
                                        os.path.join(rel_path, data))
                        save_file.close()
                        # To still have an easy access to the history of the
                        # processing, we keep one folder.
                        pathlist.pop()
                        for path in pathlist:
                            shutil.rmtree(path)
                    except:
                        self._log(
                            "64 bit mode also failed. Please check your files and your code or contact your local programmer!",
                            level=logging.CRITICAL)
                os.chdir(cwd)
Exemple #10
0
    from PyQt4 import QtGui

    # Copy the csv-files to a temporary directory such that there is no risk of
    # interfering with the running operation
    input_dir = sys.argv[1]
    temp_dir = tempfile.mkdtemp()
    for filename in fnmatch.filter(os.listdir(os.path.abspath(input_dir)),
                                   "*.csv"):
        shutil.copy(input_dir + os.sep + filename, temp_dir)

    file_path = os.path.dirname(os.path.abspath(__file__))
    pyspace_path = file_path[:file_path.rfind('pySPACE') - 1]
    if not pyspace_path in sys.path:
        sys.path.append(pyspace_path)

    # Import csv-analysis and merge csv files
    from pySPACE.resources.dataset_defs.performance_result import PerformanceResultSummary
    PerformanceResultSummary.merge_performance_results(temp_dir)

    # Invoke results analysis gui
    from pySPACE.run.gui.performance_results_analysis import PerformanceResultsAnalysisMainWindow

    app = QtGui.QApplication(sys.argv)
    performance_results_analysis = \
            PerformanceResultsAnalysisMainWindow(temp_dir + os.sep + "results.csv")
    performance_results_analysis.show()

    # Clean up
    shutil.rmtree(temp_dir)

    sys.exit(app.exec_())
Exemple #11
0
    def consolidate(self, _=None):
        """ Consolidates the results obtained by the single processes into a consistent structure
        of collections that are stored on the file system.
        """
        # Consolidate the results
        directory_pattern = os.sep.join([self.result_directory, "{*",])
        dataset_pathes = glob.glob(directory_pattern)

        # For all collections found
        for dataset_path in dataset_pathes:
            try:
                # Load their meta_data
                meta_data = BaseDataset.load_meta_data(dataset_path)

                # Determine author and date
                author = get_author()
                date = time.strftime("%Y%m%d_%H_%M_%S")

                # Update meta data and store it
                meta_data.update({"author": author, "date": date})

                # There can be either run dirs, persistency dirs, or both of them.
                # Check of whichever there are more. If both exist, their numbers
                # are supposed to be equal.
                nr_run_dirs = len(glob.glob(os.path.join(dataset_path, "data_run*")))
                nr_per_dirs = len(glob.glob(os.path.join(dataset_path, "persistency_run*")))
                nr_runs = max(nr_run_dirs, nr_per_dirs)
                if nr_runs > 1:
                    meta_data["runs"] = nr_runs

                # Store the metadata
                BaseDataset.store_meta_data(dataset_path, meta_data)

                # Copy the input dataset specification file to the result
                # directory in order to make later analysis of
                # the results more easy
                # THA: Split the first "/" from the input collection name, because otherwise it will be treated
                # as an absolute path
                input_collection_name = meta_data["input_dataset_name"][1:] if \
                    meta_data["input_dataset_name"][0] == os.sep else meta_data["input_dataset_name"]
                input_meta_path = os.path.join(pySPACE.configuration.storage, input_collection_name)
                try:
                    input_meta = BaseDataset.load_meta_data(input_meta_path)
                    BaseDataset.store_meta_data(dataset_path, input_meta, file_name="input_metadata.yaml")
                except (IOError, OSError) as e:
                    self._log("Error copying the input_metadata.yaml: {error}".format(error=e.message),
                              level=logging.CRITICAL)
            except Exception as e:
                logging.getLogger("%s" % self).exception("Error updating the metadata: {error!s}".format(error=e))
                raise e

        # If we don't create a feature vector or time series collection,
        # we evaluated our classification using a classification performance sink.
        # The resulting files should be merged to one csv tabular.
        pathlist = glob.glob(os.path.join(self.result_directory,"results_*"))
        if len(pathlist)>0:
            # Do the consolidation the same way as for WekaClassificationOperation
            self._log("Consolidating results ...")
            # We load and store the results once into a PerformanceResultSummary
            # This does the necessary consolidation...
            self._log("Reading intermediate results...")
            try:
                result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
                self._log("done")
                self._log("Storing result collection")
                result_collection.store(self.result_directory)
                self._log("done")
                PerformanceResultSummary.merge_traces(self.result_directory)
            except Exception as e:
                logging.getLogger("%s" % self).exception("Error merging the result collection: {error!s}".format(
                    error=e))

            if self.compression:
                # Since we get one result summary,
                # we don't need the numerous folders.
                # So we zip them to make the whole folder more easy visible.
                import zipfile
                cwd = os.getcwd()
                os.chdir(self.result_directory)
                # If there are to many or to large folders, problems may occur.
                # This case we want to log, try 64 bit mode,
                # and then skip the zipping.
                try:
                    pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))

                    if not self.compression == "delete":
                        save_file = zipfile.ZipFile(
                            self.result_directory+'/result_folders.zip',
                            mode="w", compression=self.compression)
                        # we want to have the zipped file relative to the
                        # result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path=os.path.relpath(node[0],
                                                         self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(os.path.join(rel_path,
                                                                 data))
                        save_file.close()
                    # To still have an easy access to the history of the
                    # processing, we keep one folder.
                    pathlist.pop()
                    for path in pathlist:
                        shutil.rmtree(path)
                except Exception, e:
                    self._log("Result files could not be compressed with 32"+
                              " bit mode, switching to 64 bit mode",
                              level=logging.CRITICAL)
                    # nearly total code copy, only difference with 64 bit mode
                    try:
                        pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))
                        save_file=zipfile.ZipFile(
                            self.result_directory+'/result_folders.zip',
                            mode="w", compression=self.compression,
                            allowZip64=True)
                        # we want to have the zipped file relative to the
                        # result directory
                        for path in pathlist:
                            for node in os.walk(path):
                                rel_path = os.path.relpath(node[0],
                                                         self.result_directory)
                                save_file.write(rel_path)
                                for data in node[2]:
                                    save_file.write(os.path.join(rel_path,data))
                        save_file.close()
                        # To still have an easy access to the history of the
                        # processing, we keep one folder.
                        pathlist.pop()
                        for path in pathlist:
                            shutil.rmtree(path)
                    except:
                        self._log("64 bit mode also failed. Please check your files and your code or contact your local programmer!", level=logging.CRITICAL)
                os.chdir(cwd)
    
    # Copy the csv-files to a temporary directory such that there is no risk of 
    # interfering with the running operation
    input_dir = sys.argv[1]
    temp_dir = tempfile.mkdtemp()
    for filename in fnmatch.filter(os.listdir(os.path.abspath(input_dir)),
                                   "*.csv"):
        shutil.copy(input_dir + os.sep + filename, temp_dir)

    file_path = os.path.dirname(os.path.abspath(__file__))
    pyspace_path = file_path[:file_path.rfind('pySPACE')-1]
    if not pyspace_path in sys.path:
        sys.path.append(pyspace_path)


    # Import csv-analysis and merge csv files
    from pySPACE.resources.dataset_defs.performance_result import PerformanceResultSummary
    PerformanceResultSummary.merge_performance_results(temp_dir)
    
    # Invoke results analysis gui
    from pySPACE.run.gui.performance_results_analysis import PerformanceResultsAnalysisMainWindow
    
    app = QtGui.QApplication(sys.argv)
    performance_results_analysis = \
            PerformanceResultsAnalysisMainWindow(temp_dir + os.sep + "results.csv")
    performance_results_analysis.show()
    
    # Clean up
    shutil.rmtree(temp_dir)
    
    sys.exit(app.exec_())