class PredictionVectorSinkNode(BaseNode):
    """ Collect all :class:`~pySPACE.resources.data_types.prediction_vector.PredictionVectorDataset` elements
    that are passed through it in a collection of type
    :mod:`~pySPACE.resources.dataset_defs.prediction_vector`.

    .. note::
        The code is heavily based on its counterpart for
        :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector`
        elements that can be found in :mod:`~pySPACE.missions.nodes.sink.feature_vector_sink.FeatureVectorSinkNode`


    **Exemplary Call**

    .. code-block:: yaml

        -
            node: PredictionVectorSink

    :input:  PredictionVector
    :output: PredictionVectorDataset
    :Author: Andrei Ignat ([email protected])
    :Created: 2014/10/15
    """
    input_types = ["PredictionVector"]

    def __init__(self, **kwargs):
        super(PredictionVectorSinkNode, self).__init__(**kwargs)
        self.set_permanent_attributes(prediction_vector_collection=None)


    def reset(self):
        """ Reset the state of the object to the clean state it had after its
        initialization
        """
        tmp = self.permanent_state
        # TODO: just a hack to get it working quickly...
        tmp["prediction_vector_collection"] = self.prediction_vector_collection
        self.__dict__ = copy.copy(tmp)
        self.permanent_state = tmp

    def is_trainable(self):
        """ Returns whether this node is trainable.

        Since we want to sink the training examples as well, this
        function wil return True
        """
        return True

    def is_supervised(self):
        """ Returns whether this node requires supervised training """
        return True

    def _train(self, data, label):
        # We do nothing
        pass

    def _create_result_sets(self):
        """ Instantiate the :class:`~pySPACE.resources.data_types.prediction_vector.PredictionVectorDataset`
        """
        self.prediction_vector_collection = PredictionVectorDataset()

    def process_current_split(self):
        """ Compute the results of this sink node for the current split
        of the data into train and test data
        """
        # Compute the prediction vectors for the data used for training
        for prediction_vector, label in self.input_node.request_data_for_training(False):
            if prediction_vector.tag != "Discard":
                if self.prediction_vector_collection is None:
                    # create the dataset if it does not already exist
                    self._create_result_sets()

                self.prediction_vector_collection.add_sample(prediction_vector,
                                                             label,
                                                             train=True,
                                                             split=self.current_split,
                                                             run=self.run_number)

        # Compute the prediction vectors for the data used for testing
        for prediction_vector, label in self.input_node.request_data_for_testing():
            # If Prediction Vectors need to be discarded, that is done here
            if prediction_vector.tag != "Discard":
                # Do lazy initialization of the class
                # (maybe there were no training examples)
                if self.prediction_vector_collection is None:
                    self._create_result_sets()
                # Add sample
                self.prediction_vector_collection.add_sample(prediction_vector,
                                                             label,
                                                             train=False,
                                                             split=self.current_split,
                                                             run=self.run_number)

    def get_result_dataset(self):
        """ Return the result dataset """
        return self.prediction_vector_collection
Exemple #2
0
class PredictionVectorSinkNode(BaseNode):
    """ Collect all :class:`~pySPACE.resources.data_types.prediction_vector.PredictionVectorDataset` elements
    that are passed through it in a collection of type
    :mod:`~pySPACE.resources.dataset_defs.prediction_vector`.

    .. note::
        The code is heavily based on its counterpart for
        :class:`~pySPACE.resources.data_types.feature_vector.FeatureVector`
        elements that can be found in :mod:`~pySPACE.missions.nodes.sink.feature_vector_sink.FeatureVectorSinkNode`


    **Exemplary Call**

    .. code-block:: yaml

        -
            node: PredictionVectorSink

    :input:  PredictionVector
    :output: PredictionVectorDataset
    :Author: Andrei Ignat ([email protected])
    :Created: 2014/10/15
    """
    input_types = ["PredictionVector"]

    def __init__(self, **kwargs):
        super(PredictionVectorSinkNode, self).__init__(**kwargs)
        self.set_permanent_attributes(prediction_vector_collection=None)

    def reset(self):
        """ Reset the state of the object to the clean state it had after its
        initialization
        """
        tmp = self.permanent_state
        # TODO: just a hack to get it working quickly...
        tmp["prediction_vector_collection"] = self.prediction_vector_collection
        self.__dict__ = copy.copy(tmp)
        self.permanent_state = tmp

    def is_trainable(self):
        """ Returns whether this node is trainable.

        Since we want to sink the training examples as well, this
        function wil return True
        """
        return True

    def is_supervised(self):
        """ Returns whether this node requires supervised training """
        return True

    def _train(self, data, label):
        # We do nothing
        pass

    def _create_result_sets(self):
        """ Instantiate the :class:`~pySPACE.resources.data_types.prediction_vector.PredictionVectorDataset`
        """
        self.prediction_vector_collection = PredictionVectorDataset()

    def process_current_split(self):
        """ Compute the results of this sink node for the current split
        of the data into train and test data
        """
        # Compute the prediction vectors for the data used for training
        for prediction_vector, label in self.input_node.request_data_for_training(
                False):
            if prediction_vector.tag != "Discard":
                if self.prediction_vector_collection is None:
                    # create the dataset if it does not already exist
                    self._create_result_sets()

                self.prediction_vector_collection.add_sample(
                    prediction_vector,
                    label,
                    train=True,
                    split=self.current_split,
                    run=self.run_number)

        # Compute the prediction vectors for the data used for testing
        for prediction_vector, label in self.input_node.request_data_for_testing(
        ):
            # If Prediction Vectors need to be discarded, that is done here
            if prediction_vector.tag != "Discard":
                # Do lazy initialization of the class
                # (maybe there were no training examples)
                if self.prediction_vector_collection is None:
                    self._create_result_sets()
                # Add sample
                self.prediction_vector_collection.add_sample(
                    prediction_vector,
                    label,
                    train=False,
                    split=self.current_split,
                    run=self.run_number)

    def get_result_dataset(self):
        """ Return the result dataset """
        return self.prediction_vector_collection
 def _create_result_sets(self):
     """ Instantiate the :class:`~pySPACE.resources.data_types.prediction_vector.PredictionVectorDataset`
     """
     self.prediction_vector_collection = PredictionVectorDataset()
Exemple #4
0
 def _create_result_sets(self):
     """ Instantiate the :class:`~pySPACE.resources.data_types.prediction_vector.PredictionVectorDataset`
     """
     self.prediction_vector_collection = PredictionVectorDataset()