Beispiel #1
0
 def store_state(self, result_dir, index=None):
     """ Stores this node in the given directory *result_dir* """
     if self.store and self.kernel_type == 'LINEAR':
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         from pySPACE.tools.filesystem import create_directory
         create_directory(node_dir)
         try:
             self.features
         except:
             if type(self.w) == FeatureVector:
                 self.features = self.w
             elif not self.w is None:
                 self.features = FeatureVector(self.w.T, self.feature_names)
             else:
                 self.features = None
         if not self.features is None:
             # This node stores the learned features
             name = "%s_sp%s.pickle" % ("features", self.current_split)
             result_file = open(os.path.join(node_dir, name), "wb")
             result_file.write(cPickle.dumps(self.features, protocol=2))
             result_file.close()
             name = "%s_sp%s.yaml" % ("features", self.current_split)
             result_file = open(os.path.join(node_dir, name), "wb")
             result_file.write(str(self.features))
             result_file.close()
             del self.features
Beispiel #2
0
    def __init__(
        self,
        node_chain_spec,
        parameter_setting,
        rel_dataset_dir,
        run,
        split,
        storage_format,
        result_dataset_directory,
        store_node_chain=False,
        hide_parameters=[],
    ):

        super(NodeChainProcess, self).__init__()

        self.node_chain_spec = node_chain_spec
        self.parameter_setting = parameter_setting
        self.rel_dataset_dir = rel_dataset_dir
        self.storage = pySPACE.configuration.storage
        self.run = run
        self.storage_format = storage_format
        self.result_dataset_directory = result_dataset_directory
        self.persistency_dir = os.sep.join([result_dataset_directory, "persistency_run%s" % run])
        create_directory(self.persistency_dir)
        self.store_node_chain = store_node_chain
        self.hide_parameters = hide_parameters

        # reduce_log_level for process creation
        try:
            console_log_level = (
                eval(pySPACE.configuration.console_log_level)
                if hasattr(pySPACE.configuration, "console_log_level")
                else logging.WARNING
            )
        except (AttributeError, NameError):
            console_log_level = logging.WARNING
        try:
            file_log_level = (
                eval(pySPACE.configuration.file_log_level)
                if hasattr(pySPACE.configuration, "file_log_level")
                else logging.INFO
            )
        except (AttributeError, NameError):
            file_log_level = logging.INFO

        self.min_log_level = min(console_log_level, file_log_level)
        pySPACE.configuration.min_log_level = self.min_log_level
        # Replace parameters in spec file
        #        self.node_chain_spec = replace_parameters_and_convert(
        #            self.node_chain_spec, self.parameter_setting)
        self.node_chain_spec = replace_parameters2(self.node_chain_spec, self.parameter_setting)
        # Create node chain
        self.node_chain = NodeChainFactory.flow_from_yaml(Flow_Class=BenchmarkNodeChain, flow_spec=self.node_chain_spec)

        for node in self.node_chain:
            node.current_split = split
        # Remove pseudo parameter "__PREPARE_OPERATION__"
        if "__PREPARE_OPERATION__" in self.parameter_setting:
            self.parameter_setting = copy.deepcopy(self.parameter_setting)
            self.parameter_setting.pop("__PREPARE_OPERATION__")
Beispiel #3
0
 def store_state(self, result_dir, index=None): 
     """ Stores this node in the given directory *result_dir* """
     if self.store and self.kernel_type == 'LINEAR':
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         from pySPACE.tools.filesystem import create_directory
         create_directory(node_dir)
         try:
             self.features
         except:
             if type(self.w) == FeatureVector:
                 self.features = self.w
             elif not self.w is None:
                 self.features = FeatureVector(self.w.T, self.feature_names)
             else:
                 self.features=None
         if not self.features is None:
             # This node stores the learned features
             name = "%s_sp%s.pickle" % ("features", self.current_split)
             result_file = open(os.path.join(node_dir, name), "wb")
             result_file.write(cPickle.dumps(self.features, protocol=2))
             result_file.close()
             name = "%s_sp%s.yaml" % ("features", self.current_split)
             result_file = open(os.path.join(node_dir, name), "wb")
             result_file.write(str(self.features))
             result_file.close()
             del self.features
Beispiel #4
0
    def __init__(self, dataset_dir, command_template, parametrization,
                 run_number, split_number, operation_result_dir,
                 hide_parameters = []):
        
        super(WEKAFilterProcess, self).__init__()
        
        # Determine the directory in which the of the process' results
        # are stored
        result_collection_name = dataset_dir.split(os.sep)[-2]
        for parameter_name, parameter_value in parametrization.iteritems():
            # If this is a parameter that should not be hidden, then we have to
            # encode it in the result collection name 
            if not parameter_name in hide_parameters:
                result_collection_name += "{__%s__:%s}" % (parameter_name.upper(),
                                                           parameter_value)
                                                                     
        self.result_directory = os.path.join(operation_result_dir,
                                             result_collection_name)
        
        # Create directory for intermediate results if it does not exist yet
        create_directory(self.result_directory 
                              + os.sep + "data_run%s" % run_number)
                
        # Create collection
        collection = BaseDataset.load(dataset_dir)
        
        # The parametrization that is independent of the collection type 
        # and the specific weka command template that is executed
        self.params = {"dataset_name": dataset_dir.replace('/','_'),
                       "dataset_dir": dataset_dir,
                       "run_number": run_number,
                       "split_number": split_number,
                       "weka_class_path": pySPACE.configuration.weka_class_path,
                       "temp_results": self.result_directory}

        # Load the abbreviations
        abbreviations_file = open(os.path.join(pySPACE.configuration.spec_dir,
                                               'operations/weka_templates',
                                               'abbreviations.yaml'), 'r')
        self.abbreviations = yaml.load(abbreviations_file)
        # Add custom parameters for the weka command template
        for parameter_name, parameter_value in parametrization.iteritems():
            # Auto-expand abbreviations
            if parameter_value in self.abbreviations:
                parameter_value = self.abbreviations[parameter_value]
            self.params[parameter_name] = parameter_value
            
        # Build the WEKA command by repeatedly replacing all placeholders in 
        # the template 
        while True:
            instantiated_template = command_template % self.params
            if instantiated_template == command_template:
                # All placeholders replace 
                self.weka_command = instantiated_template
                break
            else:
                # We have to continue since we are not converged
                command_template = instantiated_template
        
        self.handler_class = None
Beispiel #5
0
    def __init__(self,
                 node_chain_spec,
                 parameter_setting,
                 rel_dataset_dir,
                 run,
                 split,
                 storage_format,
                 result_dataset_directory,
                 store_node_chain=False,
                 hide_parameters=[]):

        super(NodeChainProcess, self).__init__()

        self.node_chain_spec = node_chain_spec
        self.parameter_setting = parameter_setting
        self.rel_dataset_dir = rel_dataset_dir
        self.storage = pySPACE.configuration.storage
        self.run = run
        self.storage_format = storage_format
        self.result_dataset_directory = result_dataset_directory
        self.persistency_dir = os.sep.join(
            [result_dataset_directory,
             "persistency_run%s" % run])
        create_directory(self.persistency_dir)
        self.store_node_chain = store_node_chain
        self.hide_parameters = hide_parameters

        # reduce_log_level for process creation
        try:
            console_log_level = eval(pySPACE.configuration.console_log_level) \
                if hasattr(pySPACE.configuration, "console_log_level") \
                else logging.WARNING
        except (AttributeError, NameError):
            console_log_level = logging.WARNING
        try:
            file_log_level = eval(pySPACE.configuration.file_log_level) \
                if hasattr(pySPACE.configuration, "file_log_level") \
                else logging.INFO
        except (AttributeError, NameError):
            file_log_level = logging.INFO

        self.min_log_level = min(console_log_level, file_log_level)
        pySPACE.configuration.min_log_level = self.min_log_level
        # Replace parameters in spec file
        #        self.node_chain_spec = replace_parameters_and_convert(
        #            self.node_chain_spec, self.parameter_setting)
        self.node_chain_spec = replace_parameters2(self.node_chain_spec,
                                                   self.parameter_setting)
        # Create node chain
        self.node_chain = NodeChainFactory.flow_from_yaml(
            Flow_Class=BenchmarkNodeChain, flow_spec=self.node_chain_spec)

        for node in self.node_chain:
            node.current_split = split
        # Remove pseudo parameter "__PREPARE_OPERATION__"
        if "__PREPARE_OPERATION__" in self.parameter_setting:
            self.parameter_setting = copy.deepcopy(self.parameter_setting)
            self.parameter_setting.pop("__PREPARE_OPERATION__")
Beispiel #6
0
 def store_state(self, result_dir, index=None): 
     """ Stores the projection in the given directory *result_dir* """
     if self.store:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(node_dir)
         name = "%s_sp%s.pickle" % ("projection", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(cPickle.dumps(self.projection, protocol=2))
         result_file.close()
Beispiel #7
0
 def _createProcesses(cls, processes, result_dir, data_dict, parameters,
                      metrics, top_level):
     """ Recursive function that is used to create the analysis processes
     
     Each process creates one plot for each numeric parameter, each pair of
     numeric parameters, and each nominal parameter based on the data 
     contained in the *data_dict*. The results are stored in *result_dir*.
     The method calls itself recursively for each value of each parameter.
     """
     # Create the analysis process for the given parameters and the
     # given data
     process = AnalysisProcess(result_dir, data_dict, parameters, metrics)
     processes.put(process)
             
     # If we have less than two parameters it does not make sense to
     # split further
     if len(parameters) < 2:
         if top_level == True:
             # If we have only one parameter to visualize,
             # we don't need to create any further processes,
             # and we have to finish the creating process.
             processes.put(False)
         return 
     
     # For each parameter
     for proj_parameter in parameters:
         # We split the data based on the values of this parameter
         remaining_parameters = [parameter for parameter in parameters 
                                     if parameter != proj_parameter]
         # For each value the respective projection parameter can take on
         for value in set(data_dict[proj_parameter]):
             # Project the result dict onto the rows where the respective 
             # parameter takes on the given value
             projected_dict = defaultdict(list)
             entries_added = False
             for i in range(len(data_dict[parameter])):
                 if data_dict[proj_parameter][i] == value:
                     entries_added = True
                     for column_key in data_dict.keys():
                         if column_key == proj_parameter: continue
                         projected_dict[column_key].append(data_dict[column_key][i])
             # If the projected_dict is empty we continue
             if not entries_added:
                 continue
             
             # Create result_dir and do the recursive call for the 
             # projected data 
             # Parameter is seperated via #
             proj_result_dir = result_dir + os.sep + "%s#%s" % (proj_parameter,
                                                                value)
             create_directory(proj_result_dir)
             cls._createProcesses(processes, proj_result_dir, projected_dict,
                                  remaining_parameters, metrics, False)
     if top_level == True:
         # print "last process created"
         # give executing process the sign that creation is now finished
         processes.put(False)
Beispiel #8
0
 def store_state(self, result_dir, index=None):
     """ Stores the projection in the given directory *result_dir* """
     if self.store:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(node_dir)
         name = "%s_sp%s.pickle" % ("projection", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(cPickle.dumps(self.projection, protocol=2))
         result_file.close()
Beispiel #9
0
    def create(cls, operation_spec, base_result_dir=None):
        """
        A factory method that calls the responsible method
        for creating an operation of the type specified in
        the operation specification dictionary (*operation_spec*).
        """
        # Determine result directory
        result_directory = cls.get_unique_result_dir(base_result_dir)
        print("--> Results will be stored at: \n\t\t %s"%str(result_directory))
        # Check if the required directories exist 
        # and create them if necessary
        create_directory(result_directory)

        # Determine all input datasets (note: they can be specified by
        # extended syntax for the glob package)
        storage = pySPACE.configuration.storage
        if not operation_spec.has_key("input_path"):
            warnings.warn("No input path found in operation specification.")
        input_path_pattern = os.sep.join([storage,
                                          operation_spec.get("input_path", ""),
                                          "*", ""])
        input_paths = glob.glob(input_path_pattern)
        obsolete_paths=[]
        for path in input_paths:
            file_path = os.sep.join([path,"metadata.yaml"])
            if os.path.isfile(os.sep.join([path,"metadata.yaml"])):
                continue
            elif os.path.isfile(os.sep.join([path,"collection.yaml"])):
                continue # warning comes, when data is loaded
            else:
                obsolete_paths.append(path)
                warnings.warn('Folder' + str(path) + ' seems not to be a pySPACE'+
                              ' dataset (no "metadata.yaml" found)! '+
                              'Skipping this folder in operation...')
        for path in obsolete_paths:
            input_paths.remove(path)

        op_type = operation_spec["type"]
        if op_type.endswith("_operation"):
            l=len("_operation")*-1
            op_type=op_type[:l]
            operation_spec["type"] = op_type
            warnings.warn("'%s_operation' has the wrong ending. Using '%s' instead."%(op_type,op_type),DeprecationWarning)
        op_class_name = ''.join([x.title() for x in op_type.split('_')])
        op_class_name += "Operation"
        # dynamic class import: from data_mod_name import col_class_name
        try:
            op_module = __import__('pySPACE.missions.operations.%s' % op_type,
                                        fromlist=[op_class_name])
        except:
            msg = "Operation module %s is unknown. Trying to use node_chain." % (op_type)
            from pySPACE.missions.operations.node_chain import NodeChainOperation
            op_class = NodeChainOperation
        else:
            op_class = getattr(op_module,op_class_name)
        return op_class.create(operation_spec, result_directory,
                               input_paths=input_paths)
Beispiel #10
0
    def _get_result_dataset_dir(base_dir, input_dataset_dir,
                                   parameter_setting, hide_parameters):
        """ Determines the name of the result directory

        Determines the name of the result directory based on the
        input_dataset_dir, the node_chain_name and the parameter setting.
        """
        input_name = input_dataset_dir.strip(os.sep).split(os.sep)[-1]
        input_name = input_name.strip("{}")
        # If the input is already the result of an operation
        if input_name.count("}{") > 0:
            input_name_parts = input_name.split("}{")
            input_name = input_name_parts[0]

        # Load the input meta data
        dataset_dir = os.sep.join([pySPACE.configuration.storage,
                                                input_dataset_dir])
        dataset_md = BaseDataset.load_meta_data(dataset_dir)

        # We are going to change the parameter_setting and don't want to
        # interfere with later runs so we work on a copy
        parameter_setting = copy.deepcopy(parameter_setting)

        # Ignore pseudo parameter "__PREPARE_OPERATION__"
        if "__PREPARE_OPERATION__" in parameter_setting:
            parameter_setting.pop("__PREPARE_OPERATION__")

        # Add the input parameters meta data to the given parameter setting
        if "parameter_setting" in dataset_md:
            parameter_setting.update(dataset_md["parameter_setting"])

        # We have to remove ' characters from the parameter value since
        # Weka does ignore them
        for key, value in parameter_setting.iteritems():
            if isinstance(value, basestring) and value.count("'") > 1:
                parameter_setting[key] = eval(value)

        # Determine the result_directory name
        # String between Key and value changed from ":" to "#",
        # because ot problems in windows and with windows file servers
        parameter_str = "}{".join(("%s#%s" % (key, value))
                                        for key, value in parameter_setting.iteritems()
                                            if key not in hide_parameters)

        result_name =  "{%s}" % input_name

        if parameter_str != "":
            result_name += "{%s}" % (parameter_str)

        # Determine the path where this result will be stored
        # and create the directory if necessary
        result_dir = base_dir
        result_dir += os.sep + result_name
        create_directory(result_dir)

        return result_dir
Beispiel #11
0
 def store_state(self, result_dir, index=None):
     """ Stores this node in the given directory *result_dir*. """
     if self.store:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(node_dir)
         # This node only stores the learned eigenvector and eigenvalues
         name = "%s_sp%s.pickle" % ("eigenmatrix", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(cPickle.dumps((self.avg, self.v), protocol=2))
         result_file.close()
Beispiel #12
0
 def store_state(self, result_dir, index=None):
     """ Stores this node in the given directory *result_dir*. """
     if self.store:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(node_dir)
         # This node only stores the learned eigenvector and eigenvalues
         name = "%s_sp%s.pickle" % ("eigenmatrix", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(cPickle.dumps((self.avg, self.v), protocol=2))
         result_file.close()
Beispiel #13
0
 def store_state(self, result_dir, index=None):
     """ Stores *scikit_alg* """
     if self.store:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(node_dir)
         name = "%s_sp%s.pickle" % ("Model", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(cPickle.dumps(self.scikit_alg, protocol=2))
         result_file.close()
     super(ScikitPredictor, self).store_state(result_dir, index)
Beispiel #14
0
    def store_state(self, result_dir, index=None):
        """ Stores this node in the given directory *result_dir* """
        from pySPACE.tools.filesystem import  create_directory
        node_dir = os.path.join(result_dir, self.__class__.__name__)
        create_directory(node_dir)

        result_file = open(os.path.join(node_dir, "window_definitions.txt"), "w")
        for window_def in self.window_definition:
            result_file.write(str(window_def))
        result_file.close()
Beispiel #15
0
    def store_state(self, result_dir, index=None):
        """ Stores this node in the given directory *result_dir* """
        from pySPACE.tools.filesystem import  create_directory
        node_dir = os.path.join(result_dir, self.__class__.__name__)
        create_directory(node_dir)

        result_file = open(os.path.join(node_dir, "window_definitions.txt"), "w")
        for window_def in self.window_definition:
            result_file.write(str(window_def))
        result_file.close()
Beispiel #16
0
 def store_state(self, result_dir, index=None):
     """ Stores *scikit_alg* """
     if self.store:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(node_dir)
         name = "%s_sp%s.pickle" % ("Model", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(cPickle.dumps(self.scikit_alg, protocol=2))
         result_file.close()
     super(ScikitPredictor,self).store_state(result_dir, index)
Beispiel #17
0
    def store_state(self, result_dir, index=None): 
        """ Stores this node in the given directory *result_dir* """
        if self.store:
            node_dir = os.path.join(result_dir, self.__class__.__name__)
            create_directory(node_dir)

            # This node only stores which electrodes have been selected
            name = "%s_sp%s.txt" % ("electrode_selection", self.current_split)
            result_file = open(os.path.join(node_dir, name), "wi")

            result_file.write(str(self.selected_channels))
            result_file.close()
Beispiel #18
0
    def store_state(self, result_dir, index=None):
        """ Stores this node in the given directory *result_dir* """
        if self.store:
            node_dir = os.path.join(result_dir, self.__class__.__name__)
            create_directory(node_dir)

            # This node only stores which electrodes have been selected
            name = "%s_sp%s.txt" % ("electrode_selection", self.current_split)
            result_file = open(os.path.join(node_dir, name), "wi")

            result_file.write(str(self.selected_channels))
            result_file.close()
Beispiel #19
0
 def store_state(self, result_dir, index=None): 
     """ Stores transformation and feature names in the given directory *result_dir* """
     if self.store:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         # self.__class__.__name__)
         create_directory(node_dir)
         name = "%s_sp%s.pickle" % ("FN", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(cPickle.dumps((self.translation, 
                                          self.mult, 
                                          self.feature_names), protocol=2))
         result_file.close()
     super(FeatureNormalizationNode,self).store_state(result_dir)
Beispiel #20
0
    def __init__(self, processes, operation_spec, result_directory):
        self.processes = processes
        self.operation_spec = operation_spec
        self.result_directory = result_directory

        # Check if the required directories exist
        # and create them if necessary
        create_directory(self.result_directory)

        # Store the specification of this operation in the directory
        source_operation_file = open(os.sep.join([self.result_directory, "source_operation.yaml"]), "w")
        yaml.dump(self.operation_spec, source_operation_file)
        source_operation_file.close()
 def store_state(self, result_dir, index=None): 
     """ Stores transformation and feature names in the given directory *result_dir* """
     if self.store:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         # self.__class__.__name__)
         create_directory(node_dir)
         name = "%s_sp%s.pickle" % ("FN", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(cPickle.dumps((self.translation, 
                                          self.mult, 
                                          self.feature_names), protocol=2))
         result_file.close()
     super(FeatureNormalizationNode,self).store_state(result_dir)
 def store_state(self, result_dir, index=None):
     """ Store this node in the given directory *result_dir* """
     # ..todo ::  mapping of flow_id and parameterization?!
     if self.store:
         for node in self.flow:
             node.store_state(result_dir, index)
         class_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(class_dir)
         # Store the search history
         name = "search_history_sp%d.pickle" % self.current_split
         result_file = open(os.path.join(class_dir, name), "wb")
         result_file.write(cPickle.dumps(self.search_history, 
                                         protocol=cPickle.HIGHEST_PROTOCOL))
         result_file.close()
Beispiel #23
0
    def __init__(self, processes, operation_spec, result_directory):
        self.processes = processes
        self.operation_spec = operation_spec
        self.result_directory = result_directory

        # Check if the required directories exist
        # and create them if necessary
        create_directory(self.result_directory)

        # Store the specification of this operation in the directory
        source_operation_file = open(
            os.sep.join([self.result_directory, "source_operation.yaml"]), 'w')
        yaml.dump(self.operation_spec, source_operation_file)
        source_operation_file.close()
Beispiel #24
0
 def store_state(self, result_dir, index=None):
     """ Store this node in the given directory *result_dir* """
     # ..todo ::  mapping of flow_id and parameterization?!
     if self.store:
         for node in self.flow:
             node.store_state(result_dir, index)
         class_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(class_dir)
         # Store the search history
         name = "search_history_sp%d.pickle" % self.current_split
         result_file = open(os.path.join(class_dir, name), "wb")
         result_file.write(
             cPickle.dumps(self.search_history,
                           protocol=cPickle.HIGHEST_PROTOCOL))
         result_file.close()
Beispiel #25
0
 def store_state(self, result_dir, index=None):
     """ Stores this node in the given directory *result_dir* """
     if self.store or self.visualize_pattern:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(node_dir)
     if self.store:
         # This node only stores the learned CSP patterns
         name = "%s_sp%s.pickle" % ("patterns", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(
             cPickle.dumps(self.filters, protocol=cPickle.HIGHEST_PROTOCOL))
         result_file.close()
         # Store spatial filter plots if desired
     if self.visualize_pattern:
         CSPNode._store_spatial_filter_plots(self.filters,
                                             self.channel_names, node_dir)
Beispiel #26
0
 def store_state(self, result_dir, index=None): 
     """ Stores this node in the given directory *result_dir* """
     if self.store:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(node_dir)
         # This node only stores the order of the selected features' indices
         name = "%s_sp%s.pickle" % ("selected_features", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(cPickle.dumps(self.retained_feature_indices, 
                                         protocol=2))
         result_file.close()
         
         # Store feature names
         name = "feature_names_sp%s.txt" % self.current_split
         result_file = open(os.path.join(node_dir, name), "w")
         result_file.write("%s" % self.feature_names)
         result_file.close()
Beispiel #27
0
    def store_state(self, result_dir, index=None):
        """ Stores this node in the given directory *result_dir* """
        if self.store:
            node_dir = os.path.join(result_dir, self.__class__.__name__)
            create_directory(node_dir)
            # This node only stores the order of the selected features' indices
            name = "%s_sp%s.pickle" % ("selected_features", self.current_split)
            result_file = open(os.path.join(node_dir, name), "wb")
            result_file.write(
                cPickle.dumps(self.retained_feature_indices, protocol=2))
            result_file.close()

            # Store feature names
            name = "feature_names_sp%s.txt" % self.current_split
            result_file = open(os.path.join(node_dir, name), "w")
            result_file.write("%s" % self.feature_names)
            result_file.close()
Beispiel #28
0
 def store_state(self, result_dir, index=None): 
     """ Stores this node in the given directory *result_dir* """
     if self.store or self.visualize_pattern:
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         create_directory(node_dir)
     if self.store:
         # This node only stores the learned CSP patterns
         name = "%s_sp%s.pickle" % ("patterns", self.current_split)
         result_file = open(os.path.join(node_dir, name), "wb")
         result_file.write(cPickle.dumps(self.filters, 
                                         protocol=cPickle.HIGHEST_PROTOCOL))
         result_file.close()
         # Store spatial filter plots if desired
     if self.visualize_pattern:
         CSPNode._store_spatial_filter_plots(self.filters,
                                             self.channel_names,
                                             node_dir)
Beispiel #29
0
 def __init__(self, processes, operation_spec, result_directory):
     self.processes = processes
     self.operation_spec = operation_spec
     self.result_directory = result_directory
     
     # Check if the required directories exist 
     # and create them if necessary
     create_directory(self.result_directory)
     
     # Store the specification of this operation in the directory
     # without the base_file entry
     base_file = self.operation_spec.pop("base_file", None)
     source_operation_file = open(os.sep.join([self.result_directory,
                                               "source_operation.yaml"]), 'w')
     yaml.dump(self.operation_spec, source_operation_file)
     source_operation_file.close()
     if not base_file is None:
         self.operation_spec["base_file"] = base_file
Beispiel #30
0
    def __init__(self, processes, operation_spec, result_directory):
        self.processes = processes
        self.operation_spec = operation_spec
        self.result_directory = result_directory

        # Check if the required directories exist
        # and create them if necessary
        create_directory(self.result_directory)

        # Store the specification of this operation in the directory
        # without the base_file entry
        base_file = self.operation_spec.pop("base_file", None)
        source_operation_file = open(
            os.sep.join([self.result_directory, "source_operation.yaml"]), 'w')
        yaml.dump(self.operation_spec, source_operation_file)
        source_operation_file.close()
        if not base_file is None:
            self.operation_spec["base_file"] = base_file
    def store_state(self, result_dir, index=None):
        """ Stores all generated plots in the given directory *result_dir* """
        if self.store:
            node_dir = os.path.join(result_dir, self.__class__.__name__)
            if not index == None:
                node_dir += "_%d" % int(index)

            create_directory(node_dir)

            if (self.ts_plot != None):
                name = 'timeseries_sp%s.pdf' % self.current_split
                self.ts_plot.savefig(os.path.join(node_dir, name),
                                     bbox_inches="tight")

            if (self.histo_plot != None):
                name = 'histo_sp%s.pdf' % self.current_split
                self.histo_plot.savefig(os.path.join(node_dir, name),
                                        bbox_inches="tight")

            for label in self.labeled_corr_matrix.keys():
                name = 'Feature_Correlation_%s_sp%s.txt' % (label,
                                                            self.current_split)
                pylab.savetxt(os.path.join(node_dir, name),
                              self.labeled_corr_matrix[label],
                              fmt='%s',
                              delimiter='  ')
                name = 'Feature_Development_%s_sp%s.pdf' % (label,
                                                            self.current_split)
                self.feature_development_plot[label].savefig(
                    os.path.join(node_dir, name))

            for label in self.corr_plot.keys():
                name = 'Feature_Correlation_%s_sp%s.pdf' % (label,
                                                            self.current_split)
                self.corr_plot[label].savefig(os.path.join(node_dir, name))

            pylab.close("all")
    def store_state(self, result_dir, index=None):
        """ Stores all generated plots in the given directory *result_dir* """
        if self.store:
            node_dir = os.path.join(result_dir, self.__class__.__name__)
            if not index == None:
                node_dir += "_%d" % int(index)

            create_directory(node_dir)
            
            if (self.ts_plot != None):
                name = 'timeseries_sp%s.pdf' % self.current_split
                self.ts_plot.savefig(os.path.join(node_dir, name),
                                     bbox_inches="tight")
            
            if (self.histo_plot != None):
                name = 'histo_sp%s.pdf' % self.current_split
                self.histo_plot.savefig(os.path.join(node_dir, name),
                                        bbox_inches="tight")
            
            for label in self.labeled_corr_matrix.keys():
                name = 'Feature_Correlation_%s_sp%s.txt' % (label,
                                                           self.current_split)
                pylab.savetxt(os.path.join(node_dir, name),
                              self.labeled_corr_matrix[label], fmt='%s',
                              delimiter='  ')
                name = 'Feature_Development_%s_sp%s.pdf' % (label,
                                                           self.current_split)
                self.feature_development_plot[label].savefig(
                    os.path.join(node_dir, name))
            
            for label in self.corr_plot.keys():
                name = 'Feature_Correlation_%s_sp%s.pdf' % (label,
                                                            self.current_split)
                self.corr_plot[label].savefig(os.path.join(node_dir, name))
            
            pylab.close("all")
Beispiel #33
0
    def __call__(self):
        """ Executes this process on the respective modality """
        ############## Prepare benchmarking ##############
        super(MergeProcess, self).pre_benchmarking()

        # For all input collections
        for source_test_collection_path in self.input_collections:
            # Check if the input data is splitted
            # e.g. only a single test file is in the source directory
            source_files = glob.glob(
                os.sep.join(
                    [source_test_collection_path, "data_run0", "*test*"]))
            splitted = len(source_files) > 1
            assert (not splitted)
            source_file_name = str(source_files[-1])

            # check if train sets are also present
            train_data_present = len(glob.glob(os.sep.join(
                                 [source_test_collection_path,"data_run0",\
                                  "*train*"]))) > 0

            # if training data is present -> use train and test sets separately
            if train_data_present:
                train_set_name_suffix = "train"
            else:
                train_set_name_suffix = "test"

            # We create the collection Rest_vs_Collection
            source_test_collection_name = \
                                   source_test_collection_path.split(os.sep)[-2]
            test_base_collection_name = \
                          source_test_collection_name.strip("}{").split("}{")[0]
            if self.reverse:
                target_collection_name = source_test_collection_name.replace(
                    test_base_collection_name,
                    test_base_collection_name + "_vs_" + self.name_pattern)
                key = "train"
            else:
                target_collection_name = source_test_collection_name.replace(
                    test_base_collection_name,
                    self.name_pattern + "_vs_" + test_base_collection_name)
                key = "test"

            target_collection_path = os.sep.join(
                [self.result_directory, target_collection_name])
            # determine the parameter_settings of the test collection
            test_collection = BaseDataset.load(source_test_collection_path)
            target_collection_params = \
                                 test_collection.meta_data["parameter_setting"]
            target_collection_params["__INPUT_DATASET__"] = \
                                           {key: source_test_collection_name}

            if source_file_name.endswith("arff"):
                file_ending = "arff"
                # Copy arff file from input collection to target collection
                source_test_file_path = os.sep.join([
                    source_test_collection_path, "data_run0",
                    "features_sp0" + train_set_name_suffix + ".arff"
                ])
                target_test_file_path = os.sep.join([
                    target_collection_path, "data_run0",
                    "features_sp0_" + key + ".arff"
                ])

            else:
                file_ending = source_file_name.split(".")[-1]
                source_test_file_path = source_test_collection_path
                target_test_file_path = target_collection_path

            source_train_pathes = []
            for source_train_collection_path in self.input_collections:
                source_train_collection_name = \
                                  source_train_collection_path.split(os.sep)[-2]
                # We must not use data originating from the same input
                # collection both in train and test files
                if source_test_collection_name == source_train_collection_name:
                    continue

                # Check that all constraints are fulfilled for this pair of
                # input collections
                if not all(eval(constraint_template % \
                  {'source_train_collection_name': source_train_collection_name,
                   'source_test_collection_name': source_test_collection_name})
                        for constraint_template in self.collection_constraints):
                    continue

                # check if all parameters are stored in the target path
                source_collection = \
                                BaseDataset.load(source_train_collection_path)
                source_collection_params = \
                            source_collection.meta_data["parameter_setting"]
                remaining_params = \
                          [param for param in source_collection_params.items() \
                            if param not in target_collection_params.items() and \
                               param[0] not in ["__INPUT_DATASET__",
                               "__RESULT_DIRECTORY__", "__OUTPUT_BUNDLE__",
                               "__INPUT_COLLECTION__" ]] # for old data
                if remaining_params != []:
                    for k, v in remaining_params:
                        target_collection_path += "{%s#%s}" % (k, str(v))
                        target_collection_params[k] = v

                if "arff" == file_ending:
                    source_train_file_path = \
                                      os.sep.join([source_train_collection_path,
                                                "data_run0", "features_sp0_" + \
                                               train_set_name_suffix + ".arff"])
                else:
                    source_train_file_path = source_train_collection_path

                source_train_pathes.append(source_train_file_path)

            if "arff" == file_ending:
                target_train_file_path = os.sep.join([
                    target_collection_path, "data_run0",
                    "features_sp0_" + key + ".arff"
                ])
            else:
                target_train_file_path = target_collection_path

            if len(source_train_pathes) == 0:
                continue

            create_directory(os.sep.join([target_collection_path,
                                          "data_run0"]))

            if "arff" == file_ending:
                self._copy_arff_file(source_test_file_path,
                                     target_test_file_path,
                                     source_test_collection_name,
                                     target_collection_name)

                self._merge_arff_files(target_train_file_path,
                                       source_train_pathes,
                                       target_collection_name)
                # Copy metadata.yaml
                # TODO: Adapt to new collection
                input_meta = BaseDataset.load_meta_data(
                    source_test_collection_path)
                BaseDataset.store_meta_data(target_collection_path, input_meta)
            else:
                self._copy_file(source_test_collection_path,
                                target_collection_path, train_set_name_suffix)

                self._merge_files(target_train_file_path, source_train_pathes,
                                  train_set_name_suffix,
                                  target_collection_params)

        ############## Clean up after benchmarking ##############
        super(MergeProcess, self).post_benchmarking()
    def store_state(self, result_dir, index=None):
      """ Stores plots of score distribution and sigmoid fit. """
      if self.store :
        # reliable plot of training (before linear fit)
        sort_index = numpy.argsort(self.scores)
        labels = numpy.array(self.labels)[sort_index]
        predictions = numpy.array(self.scores)[sort_index]

        plot_scores_train,l_discrete_train=self._discretize(predictions, labels)
        len_list_train, plot_emp_prob_train = self._empirical_probability(l_discrete_train)
        
        # training data after linear fit
        new_predictions = []
        for score in predictions:
            if score < 0.0:
                new_predictions.append((score + self.max_range[0]) / \
                                                      (2.0 * self.max_range[0]))
            else:
                new_predictions.append((score + self.max_range[1]) / \
                                                      (2.0 * self.max_range[1]))
        
        plot_scores_train_fit, l_discrete_train_fit = \
                                        self._discretize(new_predictions,labels)
        len_list_train_fit, plot_emp_prob_train_fit = \
                               self._empirical_probability(l_discrete_train_fit)

        # test data before sigmoid fit
        test_scores = []
        test_labels = []
        for data, label in self.input_node.request_data_for_testing():
            test_scores.append(data.prediction)
            test_labels.append(self.class_labels.index(label))
        
        sort_index = numpy.argsort(test_scores)
        labels = numpy.array(test_labels)[sort_index]
        predictions = numpy.array(test_scores)[sort_index]
        
        plot_scores_test,l_discrete_test = self._discretize(predictions, labels)
        len_list_test, plot_emp_prob_test = self._empirical_probability(l_discrete_test)

        # test data after sigmoid fit
        new_predictions = []
        for score in predictions:
            if score < -1.0*self.max_range[0]:
                new_predictions.append(0.0)
            elif score < 0.0:
                new_predictions.append((score + self.max_range[0]) / \
                                                      (2.0 * self.max_range[0]))
            elif score < self.max_range[1]:
                new_predictions.append((score + self.max_range[1]) / \
                                                      (2.0 * self.max_range[1]))
            else:
                new_predictions.append(1.0)
        
        plot_scores_test_fit, l_discrete_test_fit = \
                                        self._discretize(new_predictions,labels)
        len_list_test_fit, plot_emp_prob_test_fit = \
                               self._empirical_probability(l_discrete_test_fit)

        from pySPACE.tools.filesystem import  create_directory
        import os
        node_dir = os.path.join(result_dir, self.__class__.__name__)
        create_directory(node_dir)
        
        import pylab
        from matplotlib.transforms import offset_copy
        pylab.close()
        fig = pylab.figure(figsize=(10,10))
        ax = pylab.subplot(2,2,1)
        transOffset=offset_copy(ax.transData,fig=fig,x=0.05,y=0.1,units='inches')
        for x,y,s in zip(plot_scores_train,plot_emp_prob_train[1],len_list_train[1]):
            pylab.plot((x,),(y,),'ro')
            pylab.text(x,y,'%d' % s, transform=transOffset)
        
        pylab.plot((plot_scores_train[0],plot_scores_train[-1]),(0,1),'-')
        x1 = numpy.arange(-1.0*self.max_range[0],0.0,.02)
        x2 = numpy.arange(0.0,self.max_range[1],.02)
        y1 = (x1+self.max_range[0])/(2*self.max_range[0])
        y2 = (x2+self.max_range[1])/(2*self.max_range[1])
        pylab.plot(numpy.concatenate((x1,x2)),numpy.concatenate((y1,y2)),'-')
        pylab.xlim(plot_scores_train[0],plot_scores_train[-1])
        pylab.ylim(0,1)
        pylab.xlabel("SVM prediction Score (training data)")
        pylab.ylabel("Empirical Probability")
        
        ax = pylab.subplot(2,2,2)
        transOffset=offset_copy(ax.transData,fig=fig,x=0.05,y=0.1,units='inches')
        for x, y, s in zip(plot_scores_train_fit, plot_emp_prob_train_fit[1], 
                                                         len_list_train_fit[1]):
            pylab.plot((x,),(y,),'ro')
            pylab.text(x,y,'%d' % s, transform=transOffset)
        
        pylab.plot((plot_scores_train_fit[0],plot_scores_train_fit[-1]),(0,1),'-')
        pylab.xlim(plot_scores_train_fit[0],plot_scores_train_fit[-1])
        pylab.ylim(0,1)
        pylab.xlabel("SVM Probability (training data)")
        pylab.ylabel("Empirical Probability")
        
        ax = pylab.subplot(2,2,3)
        transOffset=offset_copy(ax.transData,fig=fig,x=0.05,y=0.1,units='inches')
        for x,y,s in zip(plot_scores_test,plot_emp_prob_test[1],len_list_test[1]):
            pylab.plot((x,),(y,),'ro')
            pylab.text(x,y,'%d' % s, transform=transOffset)
        
        pylab.plot((plot_scores_test[0],plot_scores_test[-1]),(0,1),'-')
        x1 = numpy.arange(-1.0*self.max_range[0],0.0,.02)
        x2 = numpy.arange(0.0,self.max_range[1],.02)
        y1 = (x1+self.max_range[0])/(2*self.max_range[0])
        y2 = (x2+self.max_range[1])/(2*self.max_range[1])
        pylab.plot(numpy.concatenate([[plot_scores_test[0],self.max_range[0]],
                               x1,x2,[self.max_range[1],plot_scores_test[-1]]]),
                   numpy.concatenate([[0.0,0.0],y1,y2,[1.0,1.0]]),'-')
        pylab.xlim(plot_scores_test[0],plot_scores_test[-1])
        pylab.ylim(0,1)
        pylab.xlabel("SVM prediction Score (test data)")
        pylab.ylabel("Empirical Probability")
        
        ax = pylab.subplot(2,2,4)
        transOffset=offset_copy(ax.transData,fig=fig,x=0.05,y=0.1,units='inches')
        for x, y, s in zip(plot_scores_test_fit, plot_emp_prob_test_fit[1], 
                                                          len_list_test_fit[1]):
            pylab.plot((x,),(y,),'ro')
            pylab.text(x,y,'%d' % s, transform=transOffset)
        
        pylab.plot((plot_scores_test_fit[0],plot_scores_test_fit[-1]),(0,1),'-')
        pylab.xlim(plot_scores_test_fit[0],plot_scores_test_fit[-1])
        pylab.ylim(0,1)
        pylab.xlabel("SVM Probability (test data)")
        pylab.ylabel("Empirical Probability")
        
        pylab.savefig(node_dir + "/reliable_diagrams_%d.png" % self.current_split)
    def store_state(self, result_dir, index=None):
        """ Stores plots of score distribution and sigmoid fit or/and 
        the calculated probabilities with the corresponding label.

        .. todo:: change plot calculations to upper if else syntax
        .. todo:: add the corresponding data point to the saved probabilities
        """
        if self.store :
            # Create the directory for the stored results
            from pySPACE.tools.filesystem import  create_directory
            import os
            node_dir = os.path.join(result_dir, self.__class__.__name__)
            create_directory(node_dir)
            # Safe the probabilities in a pickle file 
            if( self.store_probabilities ):
                import pickle
                f_name=node_dir + "/probabilities_%d.pickle" % self.current_split
                pickle.dump(self.probabilities, open(f_name,'w'))
            if self.store_plots:
                # reliable plot of training (before sigmoid fit)
                sort_index = numpy.argsort(self.scores)
                labels = numpy.array(self.labels)[sort_index]
                predictions = numpy.array(self.scores)[sort_index]
                
                plot_scores_train,l_discrete_train=self._discretize(predictions, labels)
                len_list_train, plot_emp_prob_train = self._empirical_probability(l_discrete_train)
                
                # training data after sigmoid fit
                fApB = predictions * self.A + self.B
                new_predictions = [(int(fApB[i]<0)+int(fApB[i]>=0)*numpy.exp(-fApB[i]))/ \
                                 (1.0+numpy.exp((-1)**int(fApB[i]>=0)*fApB[i])) \
                                 for i in range(len(fApB))]
                
                plot_scores_train_fit, l_discrete_train_fit = \
                                                self._discretize(new_predictions,labels)
                len_list_train_fit, plot_emp_prob_train_fit = \
                                       self._empirical_probability(l_discrete_train_fit)
                
                # test data before sigmoid fit
                test_scores = []
                test_labels = []
                for data, label in self.input_node.request_data_for_testing():
                    test_scores.append(data.prediction)
                    test_labels.append(self.class_labels.index(label))
                
                sort_index = numpy.argsort(test_scores)
                labels = numpy.array(test_labels)[sort_index]
                predictions = numpy.array(test_scores)[sort_index]
                
                plot_scores_test,l_discrete_test = self._discretize(predictions, labels)
                len_list_test, plot_emp_prob_test = self._empirical_probability(l_discrete_test)
                
                # test data after sigmoid fit
                fApB = predictions * self.A + self.B
                new_predictions = [(int(fApB[i]<0)+int(fApB[i]>=0)*numpy.exp(-fApB[i]))/ \
                                 (1.0+numpy.exp((-1)**int(fApB[i]>=0)*fApB[i])) \
                                 for i in range(len(fApB))]
                
                plot_scores_test_fit, l_discrete_test_fit = \
                                                self._discretize(new_predictions,labels)
                len_list_test_fit, plot_emp_prob_test_fit = \
                                       self._empirical_probability(l_discrete_test_fit)
                
                
                
                import pylab
                from matplotlib.transforms import offset_copy
                pylab.close()
                fig = pylab.figure(figsize=(10,10))
                ax = pylab.subplot(2,2,1)
                transOffset=offset_copy(ax.transData,fig=fig,x=0.05,y=0.1,units='inches')
                for x,y,s in zip(plot_scores_train,plot_emp_prob_train[1],len_list_train[1]):
                    pylab.plot((x,),(y,),'ro')
                    pylab.text(x,y,'%d' % s, transform=transOffset)
                
                pylab.plot((plot_scores_train[0],plot_scores_train[-1]),(0,1),'-')
                x = numpy.arange(plot_scores_train[0],plot_scores_train[-1],.02)
                y = 1/(1+numpy.exp(self.A*x+self.B))
                pylab.plot(x,y,'-')
                pylab.xlim(plot_scores_train[0],plot_scores_train[-1])
                pylab.ylim(0,1)
                pylab.xlabel("SVM prediction Score (training data)")
                pylab.ylabel("Empirical Probability")
                
                ax = pylab.subplot(2,2,2)
                transOffset=offset_copy(ax.transData,fig=fig,x=0.05,y=0.1,units='inches')
                for x, y, s in zip(plot_scores_train_fit, plot_emp_prob_train_fit[1], 
                                                                 len_list_train_fit[1]):
                    pylab.plot((x,),(y,),'ro')
                    pylab.text(x,y,'%d' % s, transform=transOffset)
                
                pylab.plot((plot_scores_train_fit[0],plot_scores_train_fit[-1]),(0,1),'-')
                pylab.xlim(plot_scores_train_fit[0],plot_scores_train_fit[-1])
                pylab.ylim(0,1)
                pylab.xlabel("SVM Probability (training data)")
                pylab.ylabel("Empirical Probability")
                
                ax = pylab.subplot(2,2,3)
                transOffset=offset_copy(ax.transData,fig=fig,x=0.05,y=0.1,units='inches')
                for x,y,s in zip(plot_scores_test,plot_emp_prob_test[1],len_list_test[1]):
                    pylab.plot((x,),(y,),'ro')
                    pylab.text(x,y,'%d' % s, transform=transOffset)
                
                pylab.plot((plot_scores_test[0],plot_scores_test[-1]),(0,1),'-')
                x = numpy.arange(plot_scores_test[0],plot_scores_test[-1],.02)
                y = 1/(1+numpy.exp(self.A*x+self.B))
                pylab.plot(x,y,'-')
                pylab.xlim(plot_scores_test[0],plot_scores_test[-1])
                pylab.ylim(0,1)
                pylab.xlabel("SVM prediction Scores (test data)")
                pylab.ylabel("Empirical Probability")
                
                ax = pylab.subplot(2,2,4)
                transOffset=offset_copy(ax.transData,fig=fig,x=0.05,y=0.1,units='inches')
                for x, y, s in zip(plot_scores_test_fit, plot_emp_prob_test_fit[1], 
                                                                  len_list_test_fit[1]):
                    pylab.plot((x,),(y,),'ro')
                    pylab.text(x,y,'%d' % s, transform=transOffset)
                
                pylab.plot((plot_scores_test_fit[0],plot_scores_test_fit[-1]),(0,1),'-')
                pylab.xlim(plot_scores_test_fit[0],plot_scores_test_fit[-1])
                pylab.ylim(0,1)
                pylab.xlabel("SVM Probability (test data)")
                pylab.ylabel("Empirical Probability")
                
                pylab.savefig(node_dir + "/reliable_diagrams_%d.png" % self.current_split)
Beispiel #36
0
    def __call__(self):
        """ Executes this process on the respective modality """
        ############## Prepare benchmarking ##############
        super(ShuffleProcess, self).pre_benchmarking()

        for dataset_dir1 in self.input_datasets:
            for dataset_dir2 in self.input_datasets:
                dataset_name1 = dataset_dir1.split(os.sep)[-2]
                dataset_name2 = dataset_dir2.split(os.sep)[-2]

                # Check if the input data is split
                splitted = len(
                    glob.glob(os.sep.join([dataset_dir1, "data_run0", "*"
                                           ]))) > 1

                # Check that all constraints are fulfilled for this pair of
                # input datasets
                if not all(
                        eval(
                            constraint_template % {
                                'dataset_name1': dataset_name1,
                                'dataset_name2': dataset_name2
                            })
                        for constraint_template in self.dataset_constraints):
                    continue

                if dataset_name1 == dataset_name2:
                    if splitted:
                        # Copy the data
                        os.symlink(
                            dataset_dir1,
                            os.sep.join([self.result_directory,
                                         dataset_name1]))
                    continue

                # Determine names of the original data sets the input
                # datasets are based on
                base_dataset1 = dataset_name1.strip("}{").split("}{")[0]
                base_dataset2 = dataset_name2.strip("}{").split("}{")[0]

                # Determine target dataset name and create directory
                # for it
                mixed_base_dataset = "%s_vs_%s" % (base_dataset1,
                                                   base_dataset2)
                target_dataset_name = dataset_name1.replace(
                    base_dataset1, mixed_base_dataset)

                target_dataset_dir = os.sep.join(
                    [self.result_directory, target_dataset_name])

                create_directory(os.sep.join([target_dataset_dir,
                                              "data_run0"]))

                if splitted:
                    # For each split, copy the train data from dataset 1 and
                    # the test data from dataset 2 to the target dataset
                    for source_train_file_name in glob.glob(
                            os.sep.join(
                                [dataset_dir1, "data_run0", "*_sp*_train.*"])):
                        # TODO: We have $n$ train sets and $n$ test sets, we                   "metadata.yaml"])),

                        #       could use all $n*n$ combinations
                        target_train_file_name = source_train_file_name.replace(
                            dataset_dir1, target_dataset_dir)
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_train_file_name,
                                                 target_train_file_name,
                                                 base_dataset1,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_train_file_name,
                                       target_train_file_name)

                        source_test_file_name = source_train_file_name.replace(
                            dataset_dir1, dataset_dir2)

                        source_test_file_name = source_test_file_name.replace(
                            "train.", "test.")
                        target_test_file_name = target_train_file_name.replace(
                            "train.", "test.")
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_test_file_name,
                                                 target_test_file_name,
                                                 base_dataset2,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_test_file_name,
                                       target_test_file_name)
                else:
                    # Use the data set from dataset 1 as training set and
                    # the data set from dataset 2 as test data
                    for source_train_file_name in glob.glob(
                            os.sep.join(
                                [dataset_dir1, "data_run0", "*_sp*_test.*"])):
                        target_train_file_name = source_train_file_name.replace(
                            "test.", "train.")
                        target_train_file_name = target_train_file_name.replace(
                            dataset_dir1, target_dataset_dir)
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_train_file_name,
                                                 target_train_file_name,
                                                 base_dataset1,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_train_file_name,
                                       target_train_file_name)

                        source_test_file_name = source_train_file_name.replace(
                            dataset_dir1, dataset_dir2)

                        target_test_file_name = target_train_file_name.replace(
                            "train.", "test.")
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_test_file_name,
                                                 target_test_file_name,
                                                 base_dataset2,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_test_file_name,
                                       target_test_file_name)
                # Write metadata.yaml based on input meta data
                input_dataset1_meta = BaseDataset.load_meta_data(dataset_dir1)

                output_dataset_meta = dict(input_dataset1_meta)
                output_dataset_meta['train_test'] = True
                output_dataset_meta['date'] = time.strftime("%Y%m%d_%H_%M_%S")
                output_dataset_meta['author'] = get_author()
                BaseDataset.store_meta_data(target_dataset_dir,
                                            output_dataset_meta)

        ############## Clean up after benchmarking ##############
        super(ShuffleProcess, self).post_benchmarking()
Beispiel #37
0
 def store_state(self,
                 result_dir,     #string of results dir
                 index=None):    #None or int: number in node chain        
     """ Stores the plots to the *result_dir* and is used for offline
         plotting and for plotting of average values (online and offline).
         Plots offline-data for every trial which has not been skipped.
         Optionally creates movies based on the stored images.
         
         Called by base_node.
         
         Returns:       Nothing.
     """
     if self.store:
         #set the specific directory for this particular node
         node_dir = os.path.join(result_dir, self.__class__.__name__)
         #do we have an index-number?
         if index is None:
             #add the index-number...
             node_dir += "_%d" % int(index)
         create_directory(node_dir)
     else:
         #no specific directory
         node_dir=None
     #offline mode?
     if not self.online and (self.single_trial or self.accum_avg):
         if not hasattr(self, "_plotValues"):
             warnings.warn("VisualizationBase:: The node you are using for visualisation " \
                           "has no function _plotValues! This is most likely not what you intended!" \
                           "Plotting ignored!")
         else:
             pos = 0
             for trial_num in range(1, self.trial_counter+1):
                 if trial_num not in self.skipped_trials:
                     if self.single_trial:
                         self._plotValues(
                             values=self.st_list[pos],
                             plot_label="single_trial_no_" + str(trial_num),
                             fig_num=self.initial_fig_num+2,
                             store_dir=node_dir,
                             counter=trial_num)
                     if self.accum_avg:
                         self._plotValues(
                             values=self.accum_list[pos],
                             plot_label="accum_avg_no_"+str(trial_num),
                             fig_num=self.initial_fig_num+3,
                             store_dir=node_dir,
                             counter=trial_num)
                     pos += 1
         
     #plotting of the whole average or storage of the movie may also be possible in online mode
     if self.online:
         #set or change the the specific directory for the node to the
         #execution-path with a timestamp (see __init__)
         node_dir = self.user_dir
         
     #is averaging intended?
     if self.averaging:
         if not self.avg_values:
             warnings.warn("VisualizationBase:: One of your averages has no " \
                           "instances! Plotting ignored!")
         else:
             if hasattr(self, "_plotValues"):
                 self._plotValues(values=self.avg_values,
                                  plot_label="average",
                                  fig_num=self.initial_fig_num+1,
                                  store_dir=node_dir)
             else:
                 warnings.warn("VisualizationBase:: The node you are using for visualisation " \
                               "has no function _plotValues! This is most likely not what you intended!" \
                               "Plotting ignored!")
     
     #Finally create a movie if specified
     if self.create_movie and self.store_data:
         prefixes = []
         if self.single_trial:
             for trial in range(1, self.trial_counter+1):
                 prefixes.append("single_trial_no_" + str(trial))
         if self.accum_avg:
             for trial in range(1, self.trial_counter+1):
                 prefixes.append("accum_avg_no_" + str(trial))
         if self.averaging:
             prefixes.append('average')
         self._create_movie(prefixes=prefixes,
                            directory=node_dir)
     #close the figure windows
     pylab.close('all')
Beispiel #38
0
    def store_state(self, result_dir, index=None): 
        """ Stores this node in the given directory *result_dir* """
        if self.store:
            try:
                node_dir = os.path.join(result_dir, self.__class__.__name__)
                create_directory(node_dir)
                # This node only stores the learned spatial filters
                name = "%s_sp%s.pickle" % ("patterns", self.current_split)
                result_file = open(os.path.join(node_dir, name), "wb")
                result_file.write(cPickle.dumps((self.filters, self.wi,
                                                 self.ai), protocol=2))
                result_file.close()
                
                # Stores the signal to signal plus noise ratio resulted
                # by the spatial filter
                #fname = "SNR_sp%s.csv" % ( self.current_split)
                #numpy.savetxt(os.path.join(node_dir, fname), self.SNR,
                #    delimiter=',', fmt='%2.5e')
                
                # Store spatial filter plots if desired
                if self.visualize_pattern:
                    from pySPACE.missions.nodes.spatial_filtering.csp \
                        import CSPNode
                    # Compute, accumulate and analyze signal components
                    # estimated by xDAWN
                    vmin = numpy.inf
                    vmax = -numpy.inf
    
                    signal_components = []
                    complete_signal = numpy.zeros((self.wi.shape[1],
                                                   self.ai.shape[1]))
                    for filter_index in range(self.retained_channels):
                        #self.ai.shape[0]):
                        signal_component = numpy.outer(self.wi[filter_index, :], 
                                                       self.ai[filter_index, :])
                        vmin = min(signal_component.min(), vmin)
                        vmax = max(signal_component.max(), vmax)
                        
                        signal_components.append(signal_component)
                        complete_signal += signal_component
                    # Plotting
                    import pylab
                    for index, signal_component in enumerate(signal_components):
                        pylab.figure(0, figsize=(18,8))
                        pylab.gcf().clear()
                        
                        # Plot spatial distribution
                        ax=pylab.axes([0.0, 0.0, 0.2, 0.5])
                        CSPNode._plot_spatial_values(ax, self.wi[index, :], 
                                                     self.channel_names,
                                                     'Spatial distribution')
                        # Plot spatial filter
                        ax=pylab.axes([0.0, 0.5, 0.2, 0.5])
                        CSPNode._plot_spatial_values(ax, self.filters[:, index],
                                                     self.channel_names,
                                                     'Spatial filter')
                        # Plot signal component in electrode coordinate system 
                        self._plotTimeSeriesInEC(signal_component, vmin=vmin, 
                                                 vmax=vmax,
                                                 bb=(0.2, 1.0, 0.0, 1.0))

                        pylab.savefig("%s%ssignal_component%02d.png" 
                                      % (node_dir, os.sep, index))
    
                    CSPNode._store_spatial_filter_plots(
                        self.filters[:, :self.retained_channels],
                        self.channel_names, node_dir)
                    # Plot entire signal
                    pylab.figure(0, figsize=(15, 8))
                    pylab.gcf().clear()
                    self._plotTimeSeriesInEC(
                        complete_signal,
                        file_name="%s%ssignal_complete.png" % (node_dir, os.sep)
                        )
                    pylab.savefig(
                        "%s%ssignal_complete.png" % (node_dir, os.sep))

            except Exception as e:
                print e
                raise
        super(XDAWNNode, self).store_state(result_dir)
Beispiel #39
0
    def create(cls, operation_spec, base_result_dir=None):
        """
        A factory method that calls the responsible method
        for creating an operation of the type specified in
        the operation specification dictionary (*operation_spec*).
        """
        # Determine result directory
        result_directory = cls.get_unique_result_dir(base_result_dir)
        print("--> Results will be stored at: \n\t\t %s" %
              str(result_directory))
        # Check if the required directories exist
        # and create them if necessary
        create_directory(result_directory)

        # Determine all input datasets (note: they can be specified by
        # extended syntax for the glob package)
        storage = pySPACE.configuration.storage
        if not operation_spec.has_key("input_path"):
            warnings.warn("No input path found in operation specification.")
        input_path_pattern = os.sep.join(
            [storage, operation_spec.get("input_path", ""), "*", ""])
        input_paths = glob.glob(input_path_pattern)
        obsolete_paths = []
        for path in input_paths:
            if os.path.isfile(os.sep.join([path, "metadata.yaml"])):
                continue
            elif os.path.isfile(os.sep.join([path, "collection.yaml"])):
                continue  # warning comes, when data is loaded
            else:
                obsolete_paths.append(path)
                warnings.warn('Folder' + str(path) +
                              ' seems not to be a pySPACE' +
                              ' dataset (no "metadata.yaml" found)! ' +
                              'Skipping this folder in operation...')
        for path in obsolete_paths:
            input_paths.remove(path)

        op_type = operation_spec["type"]
        if op_type.endswith("_operation"):
            l = len("_operation") * -1
            op_type = op_type[:l]
            operation_spec["type"] = op_type
            warnings.warn(
                "'%s_operation' has the wrong ending. Using '%s' instead." %
                (op_type, op_type), DeprecationWarning)
        op_class_name = ''.join([x.title() for x in op_type.split('_')])
        op_class_name += "Operation"
        # dynamic class import: from data_mod_name import col_class_name
        try:
            op_module = __import__('pySPACE.missions.operations.%s' % op_type,
                                   fromlist=[op_class_name])
        except:
            msg = "Operation module %s is unknown. Trying to use node_chain." % (
                op_type)
            from pySPACE.missions.operations.node_chain import NodeChainOperation
            op_class = NodeChainOperation
        else:
            op_class = getattr(op_module, op_class_name)
        return op_class.create(operation_spec,
                               result_directory,
                               input_paths=input_paths)
 def __call__(self):
     """ Executes this process on the respective modality """
     ############## Prepare benchmarking ##############
     super(ConcatenateProcess, self).pre_benchmarking()
     
     # remember what has already been merged
     merged_dataset_pathes = []
     
     # For all input datasets
     for source_dataset_path1 in self.input_datasets:
         if source_dataset_path1 in merged_dataset_pathes:
             continue
         # At the moment split data is not supported, so there should be only
         # a single test file is in the source directory 
         source_files = glob.glob(os.sep.join([source_dataset_path1,
                                               "data_run0", "*test*"]))
         
         source_pathes = []
                    
         is_split = len(source_files) > 1
         assert(not is_split),"Multiple test splits as in %s \
                                 are not yet supported."%str(source_files)
        
         # At the moment train data is not supported, so check if train sets
         # are also present
         train_data_present = len(glob.glob(os.sep.join(
                              [source_dataset_path1,"data_run0",\
                               "*train*"]))) > 0
         
         assert(not train_data_present),"Using training data is not yet implemented."
         
         # We create the "All" dataset
         source_dataset_name1 = source_dataset_path1.split(os.sep)[-2]
         base_dataset_name = \
                            source_dataset_name1.strip("}{").split("}{")[0]
         if self.name_pattern != None:
             target_dataset_name = source_dataset_name1.replace(
                                 base_dataset_name, eval(self.name_pattern % \
                                  {"dataset_name" : base_dataset_name}))
         else:
             target_dataset_name = source_dataset_name1.replace(
                      base_dataset_name, base_dataset_name[:-1]+"_all")
             
         source_pathes.append(source_dataset_path1)            
         target_dataset_path = os.sep.join([self.result_directory,
                                               target_dataset_name])    
         
         for source_dataset_path2 in self.input_datasets:
             source_dataset_name2 = source_dataset_path2.split(os.sep)[-2]
             # Do not use data we have already in the source_path list
             if (source_dataset_path2 == source_dataset_path1) or (source_dataset_path2 in merged_dataset_pathes):
                 continue
             
             # Check that all constraints are fulfilled for this pair of
             # input datasets
             if not all(eval(constraint_template % \
                             {'dataset_name1': source_dataset_name1,
                              'dataset_name2': source_dataset_name2})
                             for constraint_template in self.dataset_constraints):
                 continue
                 
             source_pathes.append(source_dataset_path2)
             merged_dataset_pathes.append(source_dataset_path1)
             merged_dataset_pathes.append(source_dataset_path2)
         
         create_directory(os.sep.join([target_dataset_path, "data_run0"]))
         
         self._merge_pickle_files(target_dataset_path, source_pathes)
       
     ############## Clean up after benchmarking ##############
     super(ConcatenateProcess, self).post_benchmarking()
Beispiel #41
0
    def _get_result_dataset_dir(base_dir, input_dataset_dir, parameter_setting,
                                hide_parameters):
        """ Determines the name of the result directory

        Determines the name of the result directory based on the
        input_dataset_dir, the node_chain_name and the parameter setting.
        """

        # Determine the result_directory name
        # String between Key and value changed from ":" to "#",
        # because ot problems in windows and with windows file servers
        def _get_result_dir_name(parameter_setting,
                                 hide_parameters,
                                 method=None):
            """ internal function to create result dir name in different ways"""
            if not method:
                parameter_str = "}{".join(
                    ("%s#%s" % (key, value))
                    for key, value in parameter_setting.iteritems()
                    if key not in hide_parameters)
            elif method == "hash":
                parameter_str = "}{".join(
                    ("%s#%s" % (key, hash(str(value).replace(' ', ''))))
                    for key, value in parameter_setting.iteritems()
                    if key not in hide_parameters)

            parameter_str = parameter_str.replace("'", "")
            parameter_str = parameter_str.replace(" ", "")
            parameter_str = parameter_str.replace("[", "")
            parameter_str = parameter_str.replace("]", "")
            parameter_str = parameter_str.replace(os.sep, "")
            result_name = "{%s}" % input_name

            if parameter_str != "":
                result_name += "{%s}" % (parameter_str)

            # Determine the path where this result will be stored
            # and create the directory if necessary
            result_dir = base_dir
            result_dir += os.sep + result_name
            # filename is to long
            # (longer than allowed including optional offsets for pyspace
            #  result csv naming conventions)
            # create a md5 hash of the result name and use that one
            import platform
            CURRENTOS = platform.system()
            if CURRENTOS == "Windows":
                # the maximum length for a filename on Windows is 255
                if len(result_dir) > 255 - 32:
                    result_name = "{" + hashlib.md5(
                        result_name).hexdigest() + "}"
                    result_dir = base_dir
                    result_dir += os.sep + result_name
                return result_dir
            else:
                if len(result_dir) > os.pathconf(os.curdir,
                                                 'PC_NAME_MAX') - 32:
                    result_name = "{" + hashlib.md5(
                        result_name).hexdigest() + "}"
                    result_dir = base_dir
                    result_dir += os.sep + result_name
                return result_dir

        input_name = input_dataset_dir.strip(os.sep).split(os.sep)[-1]
        input_name = input_name.strip("{}")
        # If the input is already the result of an operation
        if input_name.count("}{") > 0:
            input_name_parts = input_name.split("}{")
            input_name = input_name_parts[0]

        # Load the input meta data
        dataset_dir = os.sep.join(
            [pySPACE.configuration.storage, input_dataset_dir])
        dataset_md = BaseDataset.load_meta_data(dataset_dir)

        # We are going to change the parameter_setting and don't want to
        # interfere with later runs so we work on a copy
        parameter_setting = copy.deepcopy(parameter_setting)

        # Ignore pseudo parameter "__PREPARE_OPERATION__"
        if "__PREPARE_OPERATION__" in parameter_setting:
            parameter_setting.pop("__PREPARE_OPERATION__")

        # Add the input parameters meta data to the given parameter setting
        if "parameter_setting" in dataset_md:
            parameter_setting.update(dataset_md["parameter_setting"])

        # We have to remove ' characters from the parameter value since
        # Weka does ignore them
        for key, value in parameter_setting.iteritems():
            if isinstance(value, basestring) and value.count("'") > 1:
                parameter_setting[key] = eval(value)

        result_dir = _get_result_dir_name(parameter_setting, hide_parameters)
        try:
            create_directory(result_dir)
        except OSError as e:
            if e.errno == 36:
                # filename is too long
                result_dir = _get_result_dir_name(parameter_setting,
                                                  hide_parameters, "hash")
            create_directory(result_dir)

        return result_dir
Beispiel #42
0
    def __init__(self,
                 request_training=True,
                 request_test=True,
                 separate_training_and_test=False,
                 averaging=True,
                 accum_avg=False,
                 single_trial=False,
                 time_stamps=[-1],
                 store=False,
                 store_data=False,
                 create_movie=False,
                 timeshift=0,
                 online=False,
                 user_dir='./',
                 limit2class=None,
                 physiological_arrangement=True,
                 history_index=None,
                 use_FN=True,
                 use_SF=True,
                 SF_channels=None,
                 use_transformation=False,
                 rand_initial_fig=True,
                 covariancing=False,
                 **kwargs):
        """ Used to initialize the environment.
           Called by VisualizationBase child-node.
           
           Parameters:    See description above.
           Returns:       Nothing.
        """

        #should training and test data be handled separately
        if separate_training_and_test:
            #if yes: all data has to be requested
            request_training = True
            request_test = True

        #modify request_training accordingly
        self.request_training = request_training

        super(VisualizationBase, self).__init__(store=store, **kwargs)

        if rand_initial_fig:
            initial_fig_num = int(numpy.random.rand() * 10000)
        else:
            initial_fig_num = 0

        if create_movie:
            #store the graphics to the persistency directory used in store_state
            store = True

        if not store:
            store_data = False

        #if plots are stored in online mode a directory is either specified or
        #data is stored in execution path
        if online and store:
            #is user_dir not set explicitly?
            if user_dir == './':
                #set the user directory to the execution-path
                user_dir = '%s/' % os.getcwd()

            #add a folder with a timestamp
            user_dir = os.path.join(user_dir,time.strftime("%Y%m%d_%H_%M_%S") + \
                        '_Visualization_Plot/')

            create_directory(user_dir)
        else:
            user_dir = None  #either offline plotting or store=False

        self.set_permanent_attributes(
            request_training=request_training,
            request_test=request_test,
            separate_training_and_test=separate_training_and_test,
            averaging=averaging,
            accum_avg=accum_avg,
            single_trial=single_trial,
            time_stamps=time_stamps,
            create_movie=create_movie,
            timeshift=timeshift,
            online=online,
            limit2class=limit2class,
            user_dir=user_dir,
            store_data=store_data,
            store=store,
            trial_counter=0,
            avg_values=dict(),
            accum_list=list(),
            st_list=list(),
            label_counter=defaultdict(int),
            skipped_trials=list(),  #list of not evaluated trials
            # whenever _execute was called
            current_trafo_TS=None,
            physiological_arrangement=physiological_arrangement,
            history_index=history_index,
            use_FN=use_FN,
            use_SF=use_SF,
            SF_channels=SF_channels,
            use_transformation=use_transformation,
            initial_fig_num=initial_fig_num,
            covariancing=covariancing,
        )
Beispiel #43
0
    def __init__(self,
                 request_training=True,
                 request_test=True,
                 separate_training_and_test=False,
                 averaging=True,
                 accum_avg=False,
                 single_trial=False,
                 time_stamps=[-1],
                 store=False,
                 store_data=False,
                 create_movie=False,
                 timeshift=0,
                 online=False,
                 user_dir='./',
                 limit2class=None,
                 physiological_arrangement=True,
                 rand_initial_fig=True,
                 **kwargs):
        """ Used to initialize the environment.
           Called by VisualizationBase child-node.
           
           Parameters:    See description above.
           Returns:       Nothing.
        """
        
        #should training and test data be handled separately
        if separate_training_and_test:
            #if yes: all data has to be requested
            request_training = True
            request_test = True
            
        #modify request_training accordingly
        self.request_training = request_training

        super(VisualizationBase, self).__init__(store=store, **kwargs)
        
        if rand_initial_fig:
            initial_fig_num=int(numpy.random.rand()*10000)
        else:
            initial_fig_num=0

        if create_movie:
            #store the graphics to the persistency directory used in store_state
            store = True

        if not store:
            store_data = False

        #if plots are stored in online mode a directory is either specified or
        #data is stored in execution path
        if online and store:
            #is user_dir not set explicitly? 
            if user_dir == './':
                #set the user directory to the execution-path
                user_dir = '%s/' % os.getcwd()

            #add a folder with a timestamp
            user_dir = user_dir + time.strftime("%Y%m%d_%H_%M_%S") + \
                        '_Visualization_Plot/'
            
            create_directory(user_dir)
        else:
            user_dir = None #either offline plotting or store=False
        
        self.set_permanent_attributes(
            request_training=request_training,
            request_test=request_test,
            separate_training_and_test=separate_training_and_test,
            averaging=averaging,
            accum_avg=accum_avg,
            single_trial=single_trial,
            time_stamps=time_stamps,
            create_movie=create_movie,
            timeshift=timeshift,
            online=online,
            limit2class=limit2class,
            user_dir=user_dir,
            store_data=store_data,
            store=store,
            trial_counter=0,
            avg_values=dict(),
            accum_list=list(),
            st_list=list(),
            label_counter=defaultdict(int),
            skipped_trials=list(),  # list of not evaluated trials whenever
                                    # _execute was called
            physiological_arrangement=physiological_arrangement,
            initial_fig_num=initial_fig_num)
 def store_state(self, result_dir, index=None): 
     """ Stores this node in the given directory *result_dir* 
     
     .. todo:: Documentation! What is stored? And how?
     """
     if self.store:
         try:
             # Create metric function lazily since it cannot be pickled
             metric_fct = self._get_metric_fct()
             
             # Determine curve on test data
             # TODO: Code duplication (mostly already in train)
             predictions_test = []
             labels_test = []
             for data, label in self.input_node.request_data_for_testing():
                 predictions_test.append(data.prediction)
                 labels_test.append(self.classes.index(label)) 
             
             sort_index = numpy.argsort(predictions_test)
             labels_test = numpy.array(labels_test)[sort_index]
             predictions_test = numpy.array(predictions_test)[sort_index]
             
             # Determine orientation of hyperplane
             if self.orientation_up:
                 TP = list(labels_test).count(1)
                 FP = list(labels_test).count(0)
                 TN = 0
                 FN = 0
             else:
                 TP = 0
                 FP = 0
                 TN = list(labels_test).count(0)
                 FN = list(labels_test).count(1)
             
             self.predictions_test = [[], []]
             for label, prediction_value, in zip (labels_test, predictions_test):
                 if label == 0 and self.orientation_up:
                     TN += 1
                     FP -= 1
                 elif label == 0 and not self.orientation_up:
                     TN -= 1
                     FP += 1
                 elif label == 1 and self.orientation_up:
                     FN += 1
                     TP -= 1
                 elif label == 1 and not self.orientation_up:
                     FN -= 1
                     TP += 1    
                 assert (TP >= 0 and FP >= 0 and TN >= 0 and FN >=0), \
                     "TP: %s FP: %s TN: %s FN: %s" % (TP, FP, TN, FN)
                 metric_value = metric_fct(TP, FP, TN, FN)
                 
                 self.predictions_test[0].append(prediction_value)
                 self.predictions_test[1].append(metric_value)
                 
             ### Plot ##
             import pylab
             pylab.close()
             fig_width_pt = 307.28987*2 # Get this from LaTeX using \showthe\columnwidth
             inches_per_pt = 1.0/72.27               # Convert pt to inches
             fig_width = fig_width_pt*inches_per_pt  # width in inches
             fig_height =fig_width * 0.5     # height in inches
             fig_size = [fig_width,fig_height]
             params = {'axes.labelsize': 10,
                       'text.fontsize': 8,
                       'legend.fontsize': 8,
                       'xtick.labelsize': 10,
                       'ytick.labelsize': 10}
             pylab.rcParams.update(params)
             fig = pylab.figure(0, dpi=400,figsize=fig_size)
             
             xmin = min(min(self.predictions_train[0]), min(self.predictions_test[0]))
             xmax = max(max(self.predictions_train[0]), max(self.predictions_test[0]))
             ymin = min(min(self.predictions_train[1]), min(self.predictions_test[1]))
             ymax = max(max(self.predictions_train[1]), max(self.predictions_test[1]))
             
             pylab.plot(self.predictions_train[0], self.predictions_train[1], 'b',
                        label = 'Training data')
             pylab.plot(self.predictions_test[0], self.predictions_test[1], 'g',
                        label = 'Unseen test data')
             pylab.plot([self.classifier_threshold, self.classifier_threshold],
                        [ymin, ymax], 'r', label = 'Original Threshold', lw=5)
             pylab.plot([self.threshold, self.threshold],
                        [ymin, ymax], 'c', label = 'Optimized Threshold', lw=5)
             pylab.legend(loc = 0)
             pylab.xlim((xmin, xmax))
             pylab.ylim((ymin, ymax))
             pylab.xlabel("Threshold value")
             pylab.ylabel("Metric: %s" % self.metric)
             
             # Store plot
             from pySPACE.tools.filesystem import  create_directory
             import os
             node_dir = os.path.join(result_dir, self.__class__.__name__)
             create_directory(node_dir)
             
             pylab.savefig(node_dir + os.sep + "threshold_metric.pdf")
         except:
             self._log("To many channels chosen for the retained channels! "
                   "Replaced by maximum number.", level=logging.WARNING)
         
     super(ThresholdOptimizationNode,self).store_state(result_dir)
Beispiel #45
0
    def store_state(self, result_dir, index=None):
        """ Stores this node in the given directory *result_dir* """
        if self.store:
            try:
                node_dir = os.path.join(result_dir, self.__class__.__name__)
                create_directory(node_dir)
                # This node only stores the learned spatial filters
                name = "%s_sp%s.pickle" % ("patterns", self.current_split)
                result_file = open(os.path.join(node_dir, name), "wb")
                result_file.write(
                    cPickle.dumps((self.filters, self.wi, self.ai),
                                  protocol=2))
                result_file.close()

                # Stores the signal to signal plus noise ratio resulted
                # by the spatial filter
                #fname = "SNR_sp%s.csv" % ( self.current_split)
                #numpy.savetxt(os.path.join(node_dir, fname), self.SNR,
                #    delimiter=',', fmt='%2.5e')

                # Store spatial filter plots if desired
                if self.visualize_pattern:
                    from pySPACE.missions.nodes.spatial_filtering.csp \
                        import CSPNode
                    # Compute, accumulate and analyze signal components
                    # estimated by xDAWN
                    vmin = numpy.inf
                    vmax = -numpy.inf

                    signal_components = []
                    complete_signal = numpy.zeros(
                        (self.wi.shape[1], self.ai.shape[1]))
                    for filter_index in range(self.retained_channels):
                        #self.ai.shape[0]):
                        signal_component = numpy.outer(
                            self.wi[filter_index, :], self.ai[filter_index, :])
                        vmin = min(signal_component.min(), vmin)
                        vmax = max(signal_component.max(), vmax)

                        signal_components.append(signal_component)
                        complete_signal += signal_component
                    # Plotting
                    import pylab
                    for index, signal_component in enumerate(
                            signal_components):
                        pylab.figure(0, figsize=(18, 8))
                        pylab.gcf().clear()

                        # Plot spatial distribution
                        ax = pylab.axes([0.0, 0.0, 0.2, 0.5])
                        CSPNode._plot_spatial_values(ax, self.wi[index, :],
                                                     self.channel_names,
                                                     'Spatial distribution')
                        # Plot spatial filter
                        ax = pylab.axes([0.0, 0.5, 0.2, 0.5])
                        CSPNode._plot_spatial_values(ax, self.filters[:,
                                                                      index],
                                                     self.channel_names,
                                                     'Spatial filter')
                        # Plot signal component in electrode coordinate system
                        self._plotTimeSeriesInEC(signal_component,
                                                 vmin=vmin,
                                                 vmax=vmax,
                                                 bb=(0.2, 1.0, 0.0, 1.0))

                        pylab.savefig("%s%ssignal_component%02d.png" %
                                      (node_dir, os.sep, index))

                    CSPNode._store_spatial_filter_plots(
                        self.filters[:, :self.retained_channels],
                        self.channel_names, node_dir)
                    # Plot entire signal
                    pylab.figure(0, figsize=(15, 8))
                    pylab.gcf().clear()
                    self._plotTimeSeriesInEC(
                        complete_signal,
                        file_name="%s%ssignal_complete.png" %
                        (node_dir, os.sep))
                    pylab.savefig("%s%ssignal_complete.png" %
                                  (node_dir, os.sep))

            except Exception as e:
                print e
                raise
        super(XDAWNNode, self).store_state(result_dir)
    def store_state(self, result_dir, index=None):
        """ Main method which generates and stores the graphics """
        if self.store:
            #set the specific directory for this particular node
            node_dir = os.path.join(result_dir, self.__class__.__name__)
            #do we have an index-number?
            if not index is None:
                #add the index-number...
                node_dir += "_%d" % int(index)
            create_directory(node_dir)
            colors = ["white", "black", "blue", "red"]
            if self.mode == "FeatureVector":
                for label in self.averages:
                    self.averages[label] *= 1.0/self.counter[label]
                    #http://wiki.scipy.org/Cookbook/Matplotlib/Show_colormaps
                    pylab.figure(figsize=(4, 4), dpi=300)
                    pylab.contourf(self.averages[label], 50, cmap="jet",
                                   origin="image")
                    pylab.xticks(())
                    pylab.yticks(())
                    #pylab.colorbar()
                    f_name = str(node_dir)+str(os.sep)+str(label)+"average"
                    pylab.savefig(f_name + ".png", bbox_inches='tight')
                for index, input in enumerate(self.inputs):
                    pylab.figure(figsize=(4, 4), dpi=300)
                    pylab.contourf(input, 50, cmap="binary",
                                   origin="image")
                    pylab.xticks(())
                    pylab.yticks(())
                    #pylab.colorbar()
                    f_name = str(node_dir)+str(os.sep)+"sample"+str(index)
                    pylab.savefig(f_name + ".png", bbox_inches='tight')

            elif self.mode == "PredictionVector":
                trafos = self.get_previous_transformations()[-1]
                trafo = trafos[0]
                trafo.view(numpy.ndarray)
                covariance = trafos[1][1]
                trafo_covariance = numpy.dot(covariance, trafo.flatten())

                # covariance free picture
                number_array = trafo.reshape((28, 28))
                fig = pylab.figure(figsize=(4, 4), dpi=300)
                pylab.contourf(number_array, 50, cmap="jet", origin="image",
                               vmax=abs(number_array).max(),
                               vmin=-abs(number_array).max())
                pylab.xticks(())
                pylab.yticks(())
                #pylab.colorbar()
                if not self.averages is None:
                    for label in self.averages:
                        self.averages[label] *= 1.0/self.counter[label]
                        pylab.contour(
                            self.averages[label],
                            levels=[50],
                            colors=colors[self.averages.keys().index(label)],
                            linewidths=3,
                            origin="image")
                f_name = str(node_dir)+str(os.sep)+"classifier"
                pylab.savefig(f_name + ".png", bbox_inches='tight')
                pylab.close(fig)

                # covariance picture (similar code as before)
                number_array = trafo_covariance.reshape((28, 28))
                fig = pylab.figure(figsize=(4, 4), dpi=300)
                pylab.contourf(number_array, 50, cmap="jet", origin="image",
                               vmax=abs(number_array).max(),
                               vmin=-abs(number_array).max())
                pylab.xticks(())
                pylab.yticks(())
                #pylab.colorbar()
                if not self.averages is None:
                    for label in self.averages:
                        pylab.contour(
                            self.averages[label],
                            levels=[50],
                            linewidths=3,
                            colors=colors[self.averages.keys().index(label)],
                            origin="image")
                f_name = str(node_dir)+str(os.sep)+"classifier_cov"
                pylab.savefig(f_name + ".png", bbox_inches='tight')
                pylab.close(fig)
            elif self.mode == "nonlinear":
                from matplotlib.backends.backend_pdf import PdfPages
                import datetime
                with PdfPages(str(node_dir)+str(os.sep)+'sample_vis.pdf') as pdf:
                    index = 0
                    for sample in self.inputs:
                        index += 1
                        base_vector = sample.history[self.history_index-1]
                        trafos = self.get_previous_transformations(base_vector)[-1]
                        trafo = trafos[0]
                        trafo.view(numpy.ndarray)
                        covariance = trafos[1][1]
                        trafo_covariance = \
                            numpy.dot(covariance, trafo.flatten())
                        covariance_array = trafo_covariance.reshape((28, 28))

                        base_array = base_vector.reshape((28, 28))
                        trafo_array = trafo.reshape((28, 28))

                        #fig = pylab.figure(figsize=(5, 5), dpi=300)

                        #pylab.suptitle(sample.label)

                        # SUBPLOT 1: plot of the derivative
                        #pylab.subplot(2, 2, 1)
                        #pylab.title("Backtransformation")
                        fig = pylab.figure(figsize=(4, 4), dpi=300)
                        pylab.contourf(trafo_array, 50, cmap="jet",
                                       origin="image",
                                       vmax=abs(trafo_array).max(),
                                       vmin=-abs(trafo_array).max())
                        pylab.xticks(())
                        pylab.yticks(())
                        # pylab.colorbar()
                        pylab.contour(
                            base_array,
                            levels=[50],
                            colors=colors[1],
                            origin="image")

                        # store and clean
                        f_name = str(node_dir) + str(os.sep) + "classifier_" \
                            + str(index)
                        pylab.savefig(f_name + ".png", bbox_inches='tight')
                        pylab.close(fig)
                        fig = pylab.figure(figsize=(4, 4), dpi=300)

                        # SUBPLOT 2: plot of the derivative multiplied with covariance
                        # pylab.subplot(2,2,2)
                        # pylab.title("Backtransformation times Covariance")

                        pylab.contourf(covariance_array, 50, cmap="jet",
                                       origin="image",
                                       vmax=abs(covariance_array).max(),
                                       vmin=-abs(covariance_array).max())
                        pylab.xticks(())
                        pylab.yticks(())
                        # pylab.colorbar()
                        pylab.contour(
                            base_array,
                            levels=[50],
                            colors=colors[1],
                            origin="image")

                        # # SUBPLOT 2: plot of the original feature vector
                        # pylab.subplot(2,2,2)
                        # pylab.title("Original data")
                        #
                        # pylab.contourf(base_array, 50, cmap="binary", origin="image")
                        # pylab.xticks(())
                        # pylab.yticks(())
                        # pylab.colorbar()

                        # # SUBPLOT 3: plot of the difference between vectors
                        # pylab.subplot(2,2,3)
                        # pylab.title("Addition")
                        #
                        # pylab.contourf(trafo_array+base_array, 50, cmap="spectral", origin="image")
                        # pylab.xticks(())
                        # pylab.yticks(())
                        # pylab.colorbar()
                        #
                        # # SUBPLOT 4: plot of the difference between vectors
                        # pylab.subplot(2,2,4)
                        # pylab.title("Subtraction")
                        #
                        # pylab.contourf(base_array-trafo_array, 50, cmap="spectral", origin="image")
                        # pylab.xticks(())
                        # pylab.yticks(())
                        # pylab.colorbar()

                        # pdf.savefig(fig, bbox_inches='tight')
                                                # store and clean
                        f_name = str(node_dir) + str(os.sep) + \
                            "classifier_cov_" + str(index)
                        pylab.savefig(f_name + ".png", bbox_inches='tight')
                        pylab.close(fig)

                        if index == self.max_samples:
                            break

                    # d = pdf.infodict()
                    # d['Title'] = 'Sample visualization'
                    # # d['Author'] = ''
                    # # d['Subject'] = ''
                    # # d['Keywords'] = ''
                    # d['CreationDate'] = datetime.datetime.today()
                    # d['ModDate'] = datetime.datetime.today()
            pylab.close('all')
Beispiel #47
0
    def __call__(self):
        """ Executes this process on the respective modality """
        ############## Prepare benchmarking ##############
        super(MergeProcess, self).pre_benchmarking()
        
        # For all input collections
        for source_test_collection_path in self.input_collections:
            # Check if the input data is splitted
            # e.g. only a single test file is in the source directory 
            source_files = glob.glob(os.sep.join([source_test_collection_path,
                                                  "data_run0", "*test*"]))
            splitted = len(source_files) > 1
            assert(not splitted)
            source_file_name = str(source_files[-1])
            
            # check if train sets are also present
            train_data_present = len(glob.glob(os.sep.join(
                                 [source_test_collection_path,"data_run0",\
                                  "*train*"]))) > 0
            
            # if training data is present -> use train and test sets separately
            if train_data_present:
                train_set_name_suffix = "train"
            else:
                train_set_name_suffix =  "test"
            
            # We create the collection Rest_vs_Collection
            source_test_collection_name = \
                                   source_test_collection_path.split(os.sep)[-2]
            test_base_collection_name = \
                          source_test_collection_name.strip("}{").split("}{")[0]
            if self.reverse:
                target_collection_name = source_test_collection_name.replace(
                                         test_base_collection_name,
                                         test_base_collection_name + "_vs_Rest")
                key = "train"
            else:
                target_collection_name = source_test_collection_name.replace(
                                         test_base_collection_name,
                                         "Rest_vs_" + test_base_collection_name)
                key = "test"
                
            target_collection_path = os.sep.join([self.result_directory,
                                                  target_collection_name])
            # determine the parameter_settings of the test collection
            test_collection = BaseDataset.load(source_test_collection_path)
            target_collection_params = \
                                 test_collection.meta_data["parameter_setting"]
            target_collection_params["__INPUT_DATASET__"] = \
                                           {key: source_test_collection_name}
            
            if source_file_name.endswith("arff"):
                file_ending = "arff"
                # Copy arff file from input collection to target collection
                source_test_file_path = os.sep.join([source_test_collection_path,
                                        "data_run0","features_sp0" +
                                        train_set_name_suffix + ".arff"])
                target_test_file_path = os.sep.join([target_collection_path,
                                       "data_run0","features_sp0_"+key+".arff"])
            
            elif source_file_name.endswith("pickle"):
                file_ending = "pickle"
                source_test_file_path = source_test_collection_path
                target_test_file_path = target_collection_path
            else:
                raise NotImplementedError("File type not supported in " \
                                                               "MergeOperation")
            
            source_train_pathes = []
            for source_train_collection_path in self.input_collections:
                source_train_collection_name = \
                                  source_train_collection_path.split(os.sep)[-2]
                # We must not use data originating from the same input
                # collection both in train and test files
                if source_test_collection_name == source_train_collection_name:
                    continue
                
                # Check that all constraints are fulfilled for this pair of
                # input collections
                if not all(eval(constraint_template % \
                  {'source_train_collection_name': source_train_collection_name,
                   'source_test_collection_name': source_test_collection_name})
                        for constraint_template in self.collection_constraints):
                    continue
                
                # check if all parameters are stored in the target path
                source_collection = \
                                BaseDataset.load(source_train_collection_path)
                source_collection_params = \
                            source_collection.meta_data["parameter_setting"]
                remaining_params = \
                          [param for param in source_collection_params.items() \
                            if param not in target_collection_params.items() and \
                               param[0] not in ["__INPUT_DATASET__", 
                               "__RESULT_DIRECTORY__", "__OUTPUT_BUNDLE__",
                               "__INPUT_COLLECTION__" ]] # for old data
                if remaining_params != []:
                    for k,v in remaining_params:
                         target_collection_path += "{%s#%s}" % (k,str(v))
                         target_collection_params[k]=v
                   
                if "arff" == file_ending:
                    source_train_file_path = \
                                      os.sep.join([source_train_collection_path, 
                                                "data_run0", "features_sp0_" + \
                                               train_set_name_suffix + ".arff"])
                elif "pickle" == file_ending:
                    source_train_file_path = source_train_collection_path

                else:
                    raise NotImplementedError("File type not supported in " \
                                                              "MergeOperation!")     
                    
                source_train_pathes.append(source_train_file_path)
            
            if "arff" == file_ending:
                target_train_file_path = os.sep.join([target_collection_path,
                                       "data_run0","features_sp0_"+key+".arff"])
            elif "pickle" == file_ending:
                target_train_file_path = target_collection_path
            else:
                raise NotImplementedError("File type not supported in "
                                                              "MergeOperation!")     
            
            if len(source_train_pathes) == 0:
                continue
            
            create_directory(os.sep.join([target_collection_path,
                                          "data_run0"]))
            
            if "arff" == file_ending:
                self._copy_arff_file(source_test_file_path, 
                                     target_test_file_path,
                                     source_test_collection_name, 
                                     target_collection_name)
                                
                self._merge_arff_files(target_train_file_path, 
                                       source_train_pathes,
                                       target_collection_name)
                # Copy metadata.yaml
                # TODO: Adapt to new collection
                input_meta = BaseDataset.load_meta_data(source_test_collection_path)
                BaseDataset.store_meta_data(target_collection_path,input_meta)
            elif "pickle" == file_ending:
                self._copy_pickle_file(source_test_collection_path,
                                       target_collection_path,
                                       train_set_name_suffix)

                self._merge_pickle_files(target_train_file_path, 
                                         source_train_pathes, 
                                         train_set_name_suffix,
                                         target_collection_params)
            else:
                raise NotImplementedError("File type not supported in merge_operation")
            
        ############## Clean up after benchmarking ##############
        super(MergeProcess, self).post_benchmarking()
Beispiel #48
0
    def __call__(self):
        """ Executes this process on the respective modality """
        ############## Prepare benchmarking ##############
        super(ShuffleProcess, self).pre_benchmarking()
        
        for dataset_dir1 in self.input_datasets:                
            for dataset_dir2 in self.input_datasets:
                dataset_name1 = dataset_dir1.split(os.sep)[-2]
                dataset_name2 = dataset_dir2.split(os.sep)[-2]
                
                # Check if the input data is split
                splitted = len(glob.glob(os.sep.join([dataset_dir1, "data_run0",
                                                      "*"]))) > 1
                
                # Check that all constraints are fulfilled for this pair of
                # input datasets
                if not all(eval(constraint_template % {'dataset_name1': dataset_name1,
                                                       'dataset_name2': dataset_name2})
                                    for constraint_template in self.dataset_constraints):
                    continue
                
                if dataset_name1 == dataset_name2:
                    if splitted:
                        # Copy the data 
                        os.symlink(dataset_dir1,
                                   os.sep.join([self.result_directory, 
                                                dataset_name1]))
                    continue
             
                # Determine names of the original data sets the input 
                # datasets are based on
                base_dataset1 = dataset_name1.strip("}{").split("}{")[0]
                base_dataset2 = dataset_name2.strip("}{").split("}{")[0]
                
                # Determine target dataset name and create directory
                # for it
                mixed_base_dataset = "%s_vs_%s" % (base_dataset1, 
                                                      base_dataset2)
                target_dataset_name = dataset_name1.replace(base_dataset1,
                                                                  mixed_base_dataset)
                
                target_dataset_dir = os.sep.join([self.result_directory,
                                                     target_dataset_name])
                
                create_directory(os.sep.join([target_dataset_dir, "data_run0"]))
                
                if splitted:
                    # For each split, copy the train data from dataset 1 and
                    # the test data from dataset 2 to the target dataset
                    for source_train_file_name in glob.glob(os.sep.join([dataset_dir1,
                                                                       "data_run0",
                                                                       "*_sp*_train.*"])):
                        # TODO: We have $n$ train sets and $n$ test sets, we                   "metadata.yaml"])),
                              
                        #       could use all $n*n$ combinations 
                        target_train_file_name = source_train_file_name.replace(dataset_dir1,
                                                                                target_dataset_dir)
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_train_file_name, 
                                                 target_train_file_name,
                                                 base_dataset1,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_train_file_name, 
                                       target_train_file_name)
                        
                        source_test_file_name = source_train_file_name.replace(dataset_dir1,
                                                                               dataset_dir2)
                        
                        source_test_file_name =  source_test_file_name.replace("train.",
                                                                                "test.")
                        target_test_file_name = target_train_file_name.replace("train.",
                                                                                "test.")
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_test_file_name, 
                                                 target_test_file_name,
                                                 base_dataset2,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_test_file_name,
                                       target_test_file_name)
                else:
                    # Use the data set from dataset 1 as training set and 
                    # the data set from dataset 2 as test data
                    for source_train_file_name in glob.glob(os.sep.join([dataset_dir1,
                                                                         "data_run0",
                                                                         "*_sp*_test.*"])):
                        target_train_file_name = source_train_file_name.replace("test.",
                                                                                "train.")
                        target_train_file_name = target_train_file_name.replace(dataset_dir1,
                                                                                target_dataset_dir)
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_train_file_name, 
                                                 target_train_file_name,
                                                 base_dataset1,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_train_file_name, 
                                       target_train_file_name)
                        
                        source_test_file_name = source_train_file_name.replace(dataset_dir1,
                                                                               dataset_dir2)
                        
                        target_test_file_name = target_train_file_name.replace("train.",
                                                                                "test.")
                        if source_train_file_name.endswith("arff"):
                            self._copy_arff_file(source_test_file_name, 
                                                 target_test_file_name,
                                                 base_dataset2,
                                                 mixed_base_dataset)
                        else:
                            os.symlink(source_test_file_name,
                                       target_test_file_name)
                # Write metadata.yaml based on input meta data
                input_dataset1_meta = BaseDataset.load_meta_data(dataset_dir1)

                output_dataset_meta = dict(input_dataset1_meta)
                output_dataset_meta['train_test'] = True
                output_dataset_meta['date'] = time.strftime("%Y%m%d_%H_%M_%S")
                try:
                    output_dataset_meta['author'] = pwd.getpwuid(os.getuid())[4]
                except :
                    self._log("Author could not be resolved.",level=logging.WARNING)
                    output_dataset_meta['author'] = "unknown"
                BaseDataset.store_meta_data(target_dataset_dir,output_dataset_meta)
        
        ############## Clean up after benchmarking ##############
        super(ShuffleProcess, self).post_benchmarking()
Beispiel #49
0
    def _createProcesses(cls, processes, result_dir, data_dict, parameters,
                         dep_par, metrics, logscale, markertype, top_level):
        """Recursive function that is used to create the analysis processes
        
        Each process creates one plot for each numeric parameter, each pair of
        numeric parameters, and each nominal parameter based on the data
        contained in the *data_dict*. The results are stored in *result_dir*.
        The method calls itself recursively for each value of each parameter.
        """

        # Create the analysis process for the given parameters and the
        # given data and put it in the executing-queue
        process = CompAnalysisProcess(result_dir, data_dict, parameters,
                                      metrics, logscale, markertype)
        processes.put(process)
        # If we have less than two parameters it does not make sense to
        # split further
        if len(parameters) < 2 or len(parameters) == len(dep_par):
            # If we have only one parameter to visualize,
            # we don't need to create any further processes,
            # and we have to finish the creating process.
            return

        # For each parameter
        for proj_parameter in parameters:
            if proj_parameter in dep_par:
                continue
            # We split the data based on the values of this parameter
            remaining_parameters = [
                parameter for parameter in parameters
                if parameter != proj_parameter
            ]
            # For each value the respective projection parameter can take on
            for value in set(data_dict[proj_parameter]):
                # Project the result dict onto the rows where the respective
                # parameter takes on the given value
                projected_dict = defaultdict(list)
                entries_added = False
                for i in range(len(data_dict[parameter])):
                    if data_dict[proj_parameter][i] == value:
                        entries_added = True
                        for column_key in data_dict.keys():
                            if column_key == proj_parameter: continue
                            projected_dict[column_key].append(
                                data_dict[column_key][i])
                # If the projected_dict is empty we continue
                if not entries_added:
                    continue

                # Create result_dir and do the recursive call for the
                # projected data
                proj_result_dir = result_dir + os.sep + "%s#%s" % (
                    proj_parameter, value)
                create_directory(proj_result_dir)
                cls._createProcesses(processes, proj_result_dir,
                                     projected_dict, remaining_parameters,
                                     dep_par, metrics, logscale, markertype,
                                     False)

        if top_level == True:
            # print "last process created"
            # give executing process the sign that creation is now finished
            processes.put(False)
Beispiel #50
0
    def store_state(self, result_dir, index=None):
        """ Stores this node in the given directory *result_dir* 
        
        .. todo:: Documentation! What is stored? And how?
        """
        if self.store:
            try:
                # Create metric function lazily since it cannot be pickled
                metric_fct = self._get_metric_fct()

                # Determine curve on test data
                # TODO: Code duplication (mostly already in train)
                predictions_test = []
                labels_test = []
                for data, label in self.input_node.request_data_for_testing():
                    predictions_test.append(data.prediction)
                    labels_test.append(self.classes.index(label))

                sort_index = numpy.argsort(predictions_test)
                labels_test = numpy.array(labels_test)[sort_index]
                predictions_test = numpy.array(predictions_test)[sort_index]

                # Determine orientation of hyperplane
                if self.orientation_up:
                    TP = list(labels_test).count(1)
                    FP = list(labels_test).count(0)
                    TN = 0
                    FN = 0
                else:
                    TP = 0
                    FP = 0
                    TN = list(labels_test).count(0)
                    FN = list(labels_test).count(1)

                self.predictions_test = [[], []]
                for label, prediction_value, in zip(labels_test,
                                                    predictions_test):
                    if label == 0 and self.orientation_up:
                        TN += 1
                        FP -= 1
                    elif label == 0 and not self.orientation_up:
                        TN -= 1
                        FP += 1
                    elif label == 1 and self.orientation_up:
                        FN += 1
                        TP -= 1
                    elif label == 1 and not self.orientation_up:
                        FN -= 1
                        TP += 1
                    assert (TP >= 0 and FP >= 0 and TN >= 0 and FN >= 0), \
                        "TP: %s FP: %s TN: %s FN: %s" % (TP, FP, TN, FN)
                    metric_value = metric_fct(TP, FP, TN, FN)

                    self.predictions_test[0].append(prediction_value)
                    self.predictions_test[1].append(metric_value)

                ### Plot ##
                import pylab
                pylab.close()
                fig_width_pt = 307.28987 * 2  # Get this from LaTeX using \showthe\columnwidth
                inches_per_pt = 1.0 / 72.27  # Convert pt to inches
                fig_width = fig_width_pt * inches_per_pt  # width in inches
                fig_height = fig_width * 0.5  # height in inches
                fig_size = [fig_width, fig_height]
                params = {
                    'axes.labelsize': 10,
                    'text.fontsize': 8,
                    'legend.fontsize': 8,
                    'xtick.labelsize': 10,
                    'ytick.labelsize': 10
                }
                pylab.rcParams.update(params)
                fig = pylab.figure(0, dpi=400, figsize=fig_size)

                xmin = min(min(self.predictions_train[0]),
                           min(self.predictions_test[0]))
                xmax = max(max(self.predictions_train[0]),
                           max(self.predictions_test[0]))
                ymin = min(min(self.predictions_train[1]),
                           min(self.predictions_test[1]))
                ymax = max(max(self.predictions_train[1]),
                           max(self.predictions_test[1]))

                pylab.plot(self.predictions_train[0],
                           self.predictions_train[1],
                           'b',
                           label='Training data')
                pylab.plot(self.predictions_test[0],
                           self.predictions_test[1],
                           'g',
                           label='Unseen test data')
                pylab.plot(
                    [self.classifier_threshold, self.classifier_threshold],
                    [ymin, ymax],
                    'r',
                    label='Original Threshold',
                    lw=5)
                pylab.plot([self.threshold, self.threshold], [ymin, ymax],
                           'c',
                           label='Optimized Threshold',
                           lw=5)
                pylab.legend(loc=0)
                pylab.xlim((xmin, xmax))
                pylab.ylim((ymin, ymax))
                pylab.xlabel("Threshold value")
                pylab.ylabel("Metric: %s" % self.metric)

                # Store plot
                from pySPACE.tools.filesystem import create_directory
                import os
                node_dir = os.path.join(result_dir, self.__class__.__name__)
                create_directory(node_dir)

                pylab.savefig(node_dir + os.sep + "threshold_metric.pdf")
            except:
                self._log(
                    "To many channels chosen for the retained channels! "
                    "Replaced by maximum number.",
                    level=logging.WARNING)

        super(ThresholdOptimizationNode, self).store_state(result_dir)
Beispiel #51
0
 def __call__(self):
     """ Executes this process on the respective modality """
     ############## Prepare benchmarking ##############
     super(ConcatenateProcess, self).pre_benchmarking()
     
     # remember what has already been merged
     merged_dataset_pathes = []
     
     # For all input datasets
     for source_dataset_path1 in self.input_datasets:
         if source_dataset_path1 in merged_dataset_pathes:
             continue
         # At the moment split data is not supported, so there should be only
         # a single test file is in the source directory 
         source_files = glob.glob(os.sep.join([source_dataset_path1,
                                               "data_run0", "*test*"]))
         
         source_pathes = []
                    
         is_split = len(source_files) > 1
         assert(not is_split),"Multiple test splits as in %s \
                                 are not yet supported."%str(source_files)
        
         # At the moment train data is not supported, so check if train sets
         # are also present
         train_data_present = len(glob.glob(os.sep.join(
                              [source_dataset_path1,"data_run0",\
                               "*train*"]))) > 0
         
         assert(not train_data_present),"Using training data is not yet implemented."
         
         # We create the "All" dataset
         source_dataset_name1 = source_dataset_path1.split(os.sep)[-2]
         base_dataset_name = \
                            source_dataset_name1.strip("}{").split("}{")[0]
         if self.name_pattern != None:
             target_dataset_name = source_dataset_name1.replace(
                                 base_dataset_name, eval(self.name_pattern % \
                                  {"dataset_name" : base_dataset_name}))
         else:
             target_dataset_name = source_dataset_name1.replace(
                      base_dataset_name, base_dataset_name[:-1]+"_all")
             
         source_pathes.append(source_dataset_path1)            
         target_dataset_path = os.sep.join([self.result_directory,
                                               target_dataset_name])    
         
         for source_dataset_path2 in self.input_datasets:
             source_dataset_name2 = source_dataset_path2.split(os.sep)[-2]
             # Do not use data we have already in the source_path list
             if (source_dataset_path2 == source_dataset_path1) or (source_dataset_path2 in merged_dataset_pathes):
                 continue
             
             # Check that all constraints are fulfilled for this pair of
             # input datasets
             if not all(eval(constraint_template % \
                             {'dataset_name1': source_dataset_name1,
                              'dataset_name2': source_dataset_name2})
                             for constraint_template in self.dataset_constraints):
                 continue
                 
             source_pathes.append(source_dataset_path2)
             merged_dataset_pathes.append(source_dataset_path1)
             merged_dataset_pathes.append(source_dataset_path2)
         
         create_directory(os.sep.join([target_dataset_path, "data_run0"]))
         
         self._merge_pickle_files(target_dataset_path, source_pathes)
       
     ############## Clean up after benchmarking ##############
     super(ConcatenateProcess, self).post_benchmarking()
    def store_state(self, result_dir, index=None):
        """ Stores plots of score distribution and sigmoid fit or/and 
        the calculated probabilities with the corresponding label.

        .. todo:: change plot calculations to upper if else syntax
        .. todo:: add the corresponding data point to the saved probabilities
        """
        if self.store:
            # Create the directory for the stored results
            from pySPACE.tools.filesystem import create_directory
            import os
            node_dir = os.path.join(result_dir, self.__class__.__name__)
            create_directory(node_dir)
            # Safe the probabilities in a pickle file
            if (self.store_probabilities):
                import pickle
                f_name = node_dir + "/probabilities_%d.pickle" % self.current_split
                pickle.dump(self.probabilities, open(f_name, 'w'))
            if self.store_plots:
                # reliable plot of training (before sigmoid fit)
                sort_index = numpy.argsort(self.scores)
                labels = numpy.array(self.labels)[sort_index]
                predictions = numpy.array(self.scores)[sort_index]

                plot_scores_train, l_discrete_train = self._discretize(
                    predictions, labels)
                len_list_train, plot_emp_prob_train = self._empirical_probability(
                    l_discrete_train)

                # training data after sigmoid fit
                fApB = predictions * self.A + self.B
                new_predictions = [(int(fApB[i]<0)+int(fApB[i]>=0)*numpy.exp(-fApB[i]))/ \
                                 (1.0+numpy.exp((-1)**int(fApB[i]>=0)*fApB[i])) \
                                 for i in range(len(fApB))]

                plot_scores_train_fit, l_discrete_train_fit = \
                                                self._discretize(new_predictions,labels)
                len_list_train_fit, plot_emp_prob_train_fit = \
                                       self._empirical_probability(l_discrete_train_fit)

                # test data before sigmoid fit
                test_scores = []
                test_labels = []
                for data, label in self.input_node.request_data_for_testing():
                    test_scores.append(data.prediction)
                    test_labels.append(self.class_labels.index(label))

                sort_index = numpy.argsort(test_scores)
                labels = numpy.array(test_labels)[sort_index]
                predictions = numpy.array(test_scores)[sort_index]

                plot_scores_test, l_discrete_test = self._discretize(
                    predictions, labels)
                len_list_test, plot_emp_prob_test = self._empirical_probability(
                    l_discrete_test)

                # test data after sigmoid fit
                fApB = predictions * self.A + self.B
                new_predictions = [(int(fApB[i]<0)+int(fApB[i]>=0)*numpy.exp(-fApB[i]))/ \
                                 (1.0+numpy.exp((-1)**int(fApB[i]>=0)*fApB[i])) \
                                 for i in range(len(fApB))]

                plot_scores_test_fit, l_discrete_test_fit = \
                                                self._discretize(new_predictions,labels)
                len_list_test_fit, plot_emp_prob_test_fit = \
                                       self._empirical_probability(l_discrete_test_fit)

                import pylab
                from matplotlib.transforms import offset_copy
                pylab.close()
                fig = pylab.figure(figsize=(10, 10))
                ax = pylab.subplot(2, 2, 1)
                transOffset = offset_copy(ax.transData,
                                          fig=fig,
                                          x=0.05,
                                          y=0.1,
                                          units='inches')
                for x, y, s in zip(plot_scores_train, plot_emp_prob_train[1],
                                   len_list_train[1]):
                    pylab.plot((x, ), (y, ), 'ro')
                    pylab.text(x, y, '%d' % s, transform=transOffset)

                pylab.plot((plot_scores_train[0], plot_scores_train[-1]),
                           (0, 1), '-')
                x = numpy.arange(plot_scores_train[0], plot_scores_train[-1],
                                 .02)
                y = 1 / (1 + numpy.exp(self.A * x + self.B))
                pylab.plot(x, y, '-')
                pylab.xlim(plot_scores_train[0], plot_scores_train[-1])
                pylab.ylim(0, 1)
                pylab.xlabel("SVM prediction Score (training data)")
                pylab.ylabel("Empirical Probability")

                ax = pylab.subplot(2, 2, 2)
                transOffset = offset_copy(ax.transData,
                                          fig=fig,
                                          x=0.05,
                                          y=0.1,
                                          units='inches')
                for x, y, s in zip(plot_scores_train_fit,
                                   plot_emp_prob_train_fit[1],
                                   len_list_train_fit[1]):
                    pylab.plot((x, ), (y, ), 'ro')
                    pylab.text(x, y, '%d' % s, transform=transOffset)

                pylab.plot(
                    (plot_scores_train_fit[0], plot_scores_train_fit[-1]),
                    (0, 1), '-')
                pylab.xlim(plot_scores_train_fit[0], plot_scores_train_fit[-1])
                pylab.ylim(0, 1)
                pylab.xlabel("SVM Probability (training data)")
                pylab.ylabel("Empirical Probability")

                ax = pylab.subplot(2, 2, 3)
                transOffset = offset_copy(ax.transData,
                                          fig=fig,
                                          x=0.05,
                                          y=0.1,
                                          units='inches')
                for x, y, s in zip(plot_scores_test, plot_emp_prob_test[1],
                                   len_list_test[1]):
                    pylab.plot((x, ), (y, ), 'ro')
                    pylab.text(x, y, '%d' % s, transform=transOffset)

                pylab.plot((plot_scores_test[0], plot_scores_test[-1]), (0, 1),
                           '-')
                x = numpy.arange(plot_scores_test[0], plot_scores_test[-1],
                                 .02)
                y = 1 / (1 + numpy.exp(self.A * x + self.B))
                pylab.plot(x, y, '-')
                pylab.xlim(plot_scores_test[0], plot_scores_test[-1])
                pylab.ylim(0, 1)
                pylab.xlabel("SVM prediction Scores (test data)")
                pylab.ylabel("Empirical Probability")

                ax = pylab.subplot(2, 2, 4)
                transOffset = offset_copy(ax.transData,
                                          fig=fig,
                                          x=0.05,
                                          y=0.1,
                                          units='inches')
                for x, y, s in zip(plot_scores_test_fit,
                                   plot_emp_prob_test_fit[1],
                                   len_list_test_fit[1]):
                    pylab.plot((x, ), (y, ), 'ro')
                    pylab.text(x, y, '%d' % s, transform=transOffset)

                pylab.plot((plot_scores_test_fit[0], plot_scores_test_fit[-1]),
                           (0, 1), '-')
                pylab.xlim(plot_scores_test_fit[0], plot_scores_test_fit[-1])
                pylab.ylim(0, 1)
                pylab.xlabel("SVM Probability (test data)")
                pylab.ylabel("Empirical Probability")

                pylab.savefig(node_dir +
                              "/reliable_diagrams_%d.png" % self.current_split)
Beispiel #53
0
    def _get_result_dataset_dir(base_dir, input_dataset_dir, parameter_setting, hide_parameters):
        """ Determines the name of the result directory

        Determines the name of the result directory based on the
        input_dataset_dir, the node_chain_name and the parameter setting.
        """
        # Determine the result_directory name
        # String between Key and value changed from ":" to "#",
        # because ot problems in windows and with windows file servers
        def _get_result_dir_name(parameter_setting, hide_parameters, method=None):
            """ internal function to create result dir name in different ways"""
            if not method:
                parameter_str = "}{".join(
                    ("%s#%s" % (key, value))
                    for key, value in parameter_setting.iteritems()
                    if key not in hide_parameters
                )
            elif method == "hash":
                parameter_str = "}{".join(
                    ("%s#%s" % (key, hash(str(value).replace(" ", ""))))
                    for key, value in parameter_setting.iteritems()
                    if key not in hide_parameters
                )

            parameter_str = parameter_str.replace("'", "")
            parameter_str = parameter_str.replace(" ", "")
            parameter_str = parameter_str.replace("[", "")
            parameter_str = parameter_str.replace("]", "")
            parameter_str = parameter_str.replace(os.sep, "")
            result_name = "{%s}" % input_name

            if parameter_str != "":
                result_name += "{%s}" % (parameter_str)

            # Determine the path where this result will be stored
            # and create the directory if necessary
            result_dir = base_dir
            result_dir += os.sep + result_name
            # filename is to long
            # (longer than allowed including optional offsets for pyspace
            #  result csv naming conventions)
            # create a md5 hash of the result name and use that one
            import platform

            CURRENTOS = platform.system()
            if CURRENTOS == "Windows":
                # the maximum length for a filename on Windows is 255
                if len(result_dir) > 255 - 32:
                    result_name = "{" + hashlib.md5(result_name).hexdigest() + "}"
                    result_dir = base_dir
                    result_dir += os.sep + result_name
                return result_dir
            else:
                if len(result_dir) > os.pathconf(os.curdir, "PC_NAME_MAX") - 32:
                    result_name = "{" + hashlib.md5(result_name).hexdigest() + "}"
                    result_dir = base_dir
                    result_dir += os.sep + result_name
                return result_dir

        input_name = input_dataset_dir.strip(os.sep).split(os.sep)[-1]
        input_name = input_name.strip("{}")
        # If the input is already the result of an operation
        if input_name.count("}{") > 0:
            input_name_parts = input_name.split("}{")
            input_name = input_name_parts[0]

        # Load the input meta data
        dataset_dir = os.sep.join([pySPACE.configuration.storage, input_dataset_dir])
        dataset_md = BaseDataset.load_meta_data(dataset_dir)

        # We are going to change the parameter_setting and don't want to
        # interfere with later runs so we work on a copy
        parameter_setting = copy.deepcopy(parameter_setting)

        # Ignore pseudo parameter "__PREPARE_OPERATION__"
        if "__PREPARE_OPERATION__" in parameter_setting:
            parameter_setting.pop("__PREPARE_OPERATION__")

        # Add the input parameters meta data to the given parameter setting
        if "parameter_setting" in dataset_md:
            parameter_setting.update(dataset_md["parameter_setting"])

        # We have to remove ' characters from the parameter value since
        # Weka does ignore them
        for key, value in parameter_setting.iteritems():
            if isinstance(value, basestring) and value.count("'") > 1:
                parameter_setting[key] = eval(value)

        result_dir = _get_result_dir_name(parameter_setting, hide_parameters)
        try:
            create_directory(result_dir)
        except OSError as e:
            if e.errno == 36:
                # filename is too long
                result_dir = _get_result_dir_name(parameter_setting, hide_parameters, "hash")
            create_directory(result_dir)

        return result_dir
Beispiel #54
0
    def store_state(
            self,
            result_dir,  #string of results dir
            index=None):  #None or int: number in node chain
        """ Stores the plots to the *result_dir* and is used for offline
            plotting and for plotting of average values (online and offline).
            Plots offline-data for every trial which has not been skipped.
            Optionally creates movies based on the stored images.
            
            Called by base_node.
            
            Returns:       Nothing.
        """
        if self.store:
            #set the specific directory for this particular node
            node_dir = os.path.join(result_dir, self.__class__.__name__)
            #do we have an index-number?
            if not index is None:
                #add the index-number...
                node_dir += "_%d" % int(index)
            create_directory(node_dir)
        else:
            #no specific directory
            node_dir = None
        #offline mode?
        if not self.online and (self.single_trial or self.accum_avg):
            if not hasattr(self, "_plotValues"):
                warnings.warn("VisualizationBase:: The node you are using for visualisation " \
                              "has no function _plotValues! This is most likely not what you intended!" \
                              "Plotting ignored!")
            else:
                pos = 0
                for trial_num in range(1, self.trial_counter + 1):
                    if trial_num not in self.skipped_trials:
                        if self.single_trial:
                            self._plotValues(values=self.st_list[pos],
                                             plot_label="single_trial_no_" +
                                             str(trial_num),
                                             fig_num=self.initial_fig_num + 2,
                                             store_dir=node_dir,
                                             counter=trial_num)
                        if self.accum_avg:
                            self._plotValues(values=self.accum_list[pos],
                                             plot_label="accum_avg_no_" +
                                             str(trial_num),
                                             fig_num=self.initial_fig_num + 3,
                                             store_dir=node_dir,
                                             counter=trial_num)
                        pos += 1

        #plotting of the whole average or storage of the movie may also be possible in online mode
        if self.online:
            #set or change the the specific directory for the node to the
            #execution-path with a timestamp (see __init__)
            node_dir = self.user_dir

        #is averaging intended?
        if self.averaging:
            if not self.avg_values:
                warnings.warn("VisualizationBase:: One of your averages has no " \
                              "instances! Plotting ignored!")
            else:
                if hasattr(self, "_plotValues"):
                    self._plotValues(values=self.avg_values,
                                     plot_label="average",
                                     fig_num=self.initial_fig_num + 1,
                                     store_dir=node_dir)
                else:
                    warnings.warn("VisualizationBase:: The node you are using for visualisation " \
                                  "has no function _plotValues! This is most likely not what you intended!" \
                                  "Plotting ignored!")

        #Finally create a movie if specified
        if self.create_movie and self.store_data:
            prefixes = []
            if self.single_trial:
                for trial in range(1, self.trial_counter + 1):
                    prefixes.append("single_trial_no_" + str(trial))
            if self.accum_avg:
                for trial in range(1, self.trial_counter + 1):
                    prefixes.append("accum_avg_no_" + str(trial))
            if self.averaging:
                prefixes.append('average')
            self._create_movie(prefixes=prefixes, directory=node_dir)
        #close the figure windows
        pylab.close('all')
    def store_state(self, result_dir, index=None):
        """ Stores plots of score distribution and sigmoid fit. """
        if self.store:
            # reliable plot of training (before linear fit)
            sort_index = numpy.argsort(self.scores)
            labels = numpy.array(self.labels)[sort_index]
            predictions = numpy.array(self.scores)[sort_index]

            plot_scores_train, l_discrete_train = self._discretize(
                predictions, labels)
            len_list_train, plot_emp_prob_train = self._empirical_probability(
                l_discrete_train)

            # training data after linear fit
            new_predictions = []
            for score in predictions:
                if score < 0.0:
                    new_predictions.append((score + self.max_range[0]) / \
                                                          (2.0 * self.max_range[0]))
                else:
                    new_predictions.append((score + self.max_range[1]) / \
                                                          (2.0 * self.max_range[1]))

            plot_scores_train_fit, l_discrete_train_fit = \
                                            self._discretize(new_predictions,labels)
            len_list_train_fit, plot_emp_prob_train_fit = \
                                   self._empirical_probability(l_discrete_train_fit)

            # test data before sigmoid fit
            test_scores = []
            test_labels = []
            for data, label in self.input_node.request_data_for_testing():
                test_scores.append(data.prediction)
                test_labels.append(self.class_labels.index(label))

            sort_index = numpy.argsort(test_scores)
            labels = numpy.array(test_labels)[sort_index]
            predictions = numpy.array(test_scores)[sort_index]

            plot_scores_test, l_discrete_test = self._discretize(
                predictions, labels)
            len_list_test, plot_emp_prob_test = self._empirical_probability(
                l_discrete_test)

            # test data after sigmoid fit
            new_predictions = []
            for score in predictions:
                if score < -1.0 * self.max_range[0]:
                    new_predictions.append(0.0)
                elif score < 0.0:
                    new_predictions.append((score + self.max_range[0]) / \
                                                          (2.0 * self.max_range[0]))
                elif score < self.max_range[1]:
                    new_predictions.append((score + self.max_range[1]) / \
                                                          (2.0 * self.max_range[1]))
                else:
                    new_predictions.append(1.0)

            plot_scores_test_fit, l_discrete_test_fit = \
                                            self._discretize(new_predictions,labels)
            len_list_test_fit, plot_emp_prob_test_fit = \
                                   self._empirical_probability(l_discrete_test_fit)

            from pySPACE.tools.filesystem import create_directory
            import os
            node_dir = os.path.join(result_dir, self.__class__.__name__)
            create_directory(node_dir)

            import pylab
            from matplotlib.transforms import offset_copy
            pylab.close()
            fig = pylab.figure(figsize=(10, 10))
            ax = pylab.subplot(2, 2, 1)
            transOffset = offset_copy(ax.transData,
                                      fig=fig,
                                      x=0.05,
                                      y=0.1,
                                      units='inches')
            for x, y, s in zip(plot_scores_train, plot_emp_prob_train[1],
                               len_list_train[1]):
                pylab.plot((x, ), (y, ), 'ro')
                pylab.text(x, y, '%d' % s, transform=transOffset)

            pylab.plot((plot_scores_train[0], plot_scores_train[-1]), (0, 1),
                       '-')
            x1 = numpy.arange(-1.0 * self.max_range[0], 0.0, .02)
            x2 = numpy.arange(0.0, self.max_range[1], .02)
            y1 = (x1 + self.max_range[0]) / (2 * self.max_range[0])
            y2 = (x2 + self.max_range[1]) / (2 * self.max_range[1])
            pylab.plot(numpy.concatenate((x1, x2)), numpy.concatenate(
                (y1, y2)), '-')
            pylab.xlim(plot_scores_train[0], plot_scores_train[-1])
            pylab.ylim(0, 1)
            pylab.xlabel("SVM prediction Score (training data)")
            pylab.ylabel("Empirical Probability")

            ax = pylab.subplot(2, 2, 2)
            transOffset = offset_copy(ax.transData,
                                      fig=fig,
                                      x=0.05,
                                      y=0.1,
                                      units='inches')
            for x, y, s in zip(plot_scores_train_fit,
                               plot_emp_prob_train_fit[1],
                               len_list_train_fit[1]):
                pylab.plot((x, ), (y, ), 'ro')
                pylab.text(x, y, '%d' % s, transform=transOffset)

            pylab.plot((plot_scores_train_fit[0], plot_scores_train_fit[-1]),
                       (0, 1), '-')
            pylab.xlim(plot_scores_train_fit[0], plot_scores_train_fit[-1])
            pylab.ylim(0, 1)
            pylab.xlabel("SVM Probability (training data)")
            pylab.ylabel("Empirical Probability")

            ax = pylab.subplot(2, 2, 3)
            transOffset = offset_copy(ax.transData,
                                      fig=fig,
                                      x=0.05,
                                      y=0.1,
                                      units='inches')
            for x, y, s in zip(plot_scores_test, plot_emp_prob_test[1],
                               len_list_test[1]):
                pylab.plot((x, ), (y, ), 'ro')
                pylab.text(x, y, '%d' % s, transform=transOffset)

            pylab.plot((plot_scores_test[0], plot_scores_test[-1]), (0, 1),
                       '-')
            x1 = numpy.arange(-1.0 * self.max_range[0], 0.0, .02)
            x2 = numpy.arange(0.0, self.max_range[1], .02)
            y1 = (x1 + self.max_range[0]) / (2 * self.max_range[0])
            y2 = (x2 + self.max_range[1]) / (2 * self.max_range[1])
            pylab.plot(
                numpy.concatenate([[plot_scores_test[0], self.max_range[0]],
                                   x1, x2,
                                   [self.max_range[1], plot_scores_test[-1]]]),
                numpy.concatenate([[0.0, 0.0], y1, y2, [1.0, 1.0]]), '-')
            pylab.xlim(plot_scores_test[0], plot_scores_test[-1])
            pylab.ylim(0, 1)
            pylab.xlabel("SVM prediction Score (test data)")
            pylab.ylabel("Empirical Probability")

            ax = pylab.subplot(2, 2, 4)
            transOffset = offset_copy(ax.transData,
                                      fig=fig,
                                      x=0.05,
                                      y=0.1,
                                      units='inches')
            for x, y, s in zip(plot_scores_test_fit, plot_emp_prob_test_fit[1],
                               len_list_test_fit[1]):
                pylab.plot((x, ), (y, ), 'ro')
                pylab.text(x, y, '%d' % s, transform=transOffset)

            pylab.plot((plot_scores_test_fit[0], plot_scores_test_fit[-1]),
                       (0, 1), '-')
            pylab.xlim(plot_scores_test_fit[0], plot_scores_test_fit[-1])
            pylab.ylim(0, 1)
            pylab.xlabel("SVM Probability (test data)")
            pylab.ylabel("Empirical Probability")

            pylab.savefig(node_dir +
                          "/reliable_diagrams_%d.png" % self.current_split)